{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 16300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001226993865030675, "grad_norm": 0.056844476610422134, "learning_rate": 0.0, "loss": 1.5532171726226807, "step": 1 }, { "epoch": 0.00245398773006135, "grad_norm": 0.05398453772068024, "learning_rate": 7.668711656441719e-08, "loss": 1.5773115158081055, "step": 2 }, { "epoch": 0.0036809815950920245, "grad_norm": 0.06344727426767349, "learning_rate": 1.5337423312883438e-07, "loss": 1.3505765199661255, "step": 3 }, { "epoch": 0.0049079754601227, "grad_norm": 0.06651192903518677, "learning_rate": 2.3006134969325155e-07, "loss": 1.4871529340744019, "step": 4 }, { "epoch": 0.006134969325153374, "grad_norm": 0.06967559456825256, "learning_rate": 3.0674846625766876e-07, "loss": 1.3247398138046265, "step": 5 }, { "epoch": 0.007361963190184049, "grad_norm": 0.05090988427400589, "learning_rate": 3.834355828220859e-07, "loss": 1.4368259906768799, "step": 6 }, { "epoch": 0.008588957055214725, "grad_norm": 0.05639501288533211, "learning_rate": 4.601226993865031e-07, "loss": 1.4667942523956299, "step": 7 }, { "epoch": 0.0098159509202454, "grad_norm": 0.04931354150176048, "learning_rate": 5.368098159509203e-07, "loss": 1.4420663118362427, "step": 8 }, { "epoch": 0.011042944785276074, "grad_norm": 0.06962227076292038, "learning_rate": 6.134969325153375e-07, "loss": 1.3270829916000366, "step": 9 }, { "epoch": 0.012269938650306749, "grad_norm": 0.05681212246417999, "learning_rate": 6.901840490797546e-07, "loss": 1.3849859237670898, "step": 10 }, { "epoch": 0.013496932515337423, "grad_norm": 0.06245573237538338, "learning_rate": 7.668711656441718e-07, "loss": 1.390697956085205, "step": 11 }, { "epoch": 0.014723926380368098, "grad_norm": 0.051362067461013794, "learning_rate": 8.43558282208589e-07, "loss": 1.5184454917907715, "step": 12 }, { "epoch": 0.015950920245398775, "grad_norm": 0.057539232075214386, "learning_rate": 9.202453987730062e-07, "loss": 1.318811297416687, "step": 13 }, { "epoch": 0.01717791411042945, "grad_norm": 0.05592890456318855, "learning_rate": 9.969325153374232e-07, "loss": 1.2885284423828125, "step": 14 }, { "epoch": 0.018404907975460124, "grad_norm": 0.07110682129859924, "learning_rate": 1.0736196319018406e-06, "loss": 1.3424336910247803, "step": 15 }, { "epoch": 0.0196319018404908, "grad_norm": 0.05588489770889282, "learning_rate": 1.1503067484662577e-06, "loss": 1.4609296321868896, "step": 16 }, { "epoch": 0.020858895705521473, "grad_norm": 0.05414435639977455, "learning_rate": 1.226993865030675e-06, "loss": 1.346307635307312, "step": 17 }, { "epoch": 0.022085889570552148, "grad_norm": 0.05896000191569328, "learning_rate": 1.303680981595092e-06, "loss": 1.2913644313812256, "step": 18 }, { "epoch": 0.023312883435582823, "grad_norm": 0.05400668457150459, "learning_rate": 1.3803680981595093e-06, "loss": 1.3340139389038086, "step": 19 }, { "epoch": 0.024539877300613498, "grad_norm": 0.051172543317079544, "learning_rate": 1.4570552147239264e-06, "loss": 1.5014619827270508, "step": 20 }, { "epoch": 0.025766871165644172, "grad_norm": 0.06336528807878494, "learning_rate": 1.5337423312883435e-06, "loss": 1.4562170505523682, "step": 21 }, { "epoch": 0.026993865030674847, "grad_norm": 0.05446509271860123, "learning_rate": 1.6104294478527609e-06, "loss": 1.4978207349777222, "step": 22 }, { "epoch": 0.02822085889570552, "grad_norm": 0.053644318133592606, "learning_rate": 1.687116564417178e-06, "loss": 1.3169641494750977, "step": 23 }, { "epoch": 0.029447852760736196, "grad_norm": 0.059931203722953796, "learning_rate": 1.763803680981595e-06, "loss": 1.388343095779419, "step": 24 }, { "epoch": 0.03067484662576687, "grad_norm": 0.05717086419463158, "learning_rate": 1.8404907975460124e-06, "loss": 1.5201928615570068, "step": 25 }, { "epoch": 0.03190184049079755, "grad_norm": 0.07009559869766235, "learning_rate": 1.9171779141104296e-06, "loss": 1.4164372682571411, "step": 26 }, { "epoch": 0.033128834355828224, "grad_norm": 0.07497748732566833, "learning_rate": 1.9938650306748465e-06, "loss": 1.525839924812317, "step": 27 }, { "epoch": 0.0343558282208589, "grad_norm": 0.05568641796708107, "learning_rate": 2.070552147239264e-06, "loss": 1.2813575267791748, "step": 28 }, { "epoch": 0.03558282208588957, "grad_norm": 0.06756455451250076, "learning_rate": 2.147239263803681e-06, "loss": 1.4711202383041382, "step": 29 }, { "epoch": 0.03680981595092025, "grad_norm": 0.05878760293126106, "learning_rate": 2.223926380368098e-06, "loss": 1.436352252960205, "step": 30 }, { "epoch": 0.03803680981595092, "grad_norm": 0.06954875588417053, "learning_rate": 2.3006134969325154e-06, "loss": 1.5035226345062256, "step": 31 }, { "epoch": 0.0392638036809816, "grad_norm": 0.05211090296506882, "learning_rate": 2.3773006134969327e-06, "loss": 1.2772533893585205, "step": 32 }, { "epoch": 0.04049079754601227, "grad_norm": 0.05606016516685486, "learning_rate": 2.45398773006135e-06, "loss": 1.3446189165115356, "step": 33 }, { "epoch": 0.04171779141104295, "grad_norm": 0.06906063854694366, "learning_rate": 2.530674846625767e-06, "loss": 1.5665283203125, "step": 34 }, { "epoch": 0.04294478527607362, "grad_norm": 0.0749950110912323, "learning_rate": 2.607361963190184e-06, "loss": 1.3163764476776123, "step": 35 }, { "epoch": 0.044171779141104296, "grad_norm": 0.06453581154346466, "learning_rate": 2.6840490797546016e-06, "loss": 1.3515610694885254, "step": 36 }, { "epoch": 0.04539877300613497, "grad_norm": 0.06933236867189407, "learning_rate": 2.7607361963190186e-06, "loss": 1.391564130783081, "step": 37 }, { "epoch": 0.046625766871165646, "grad_norm": 0.06258511543273926, "learning_rate": 2.8374233128834355e-06, "loss": 1.3991773128509521, "step": 38 }, { "epoch": 0.04785276073619632, "grad_norm": 0.06230465695261955, "learning_rate": 2.914110429447853e-06, "loss": 1.4381181001663208, "step": 39 }, { "epoch": 0.049079754601226995, "grad_norm": 0.07217125594615936, "learning_rate": 2.99079754601227e-06, "loss": 1.676390290260315, "step": 40 }, { "epoch": 0.05030674846625767, "grad_norm": 0.07924707233905792, "learning_rate": 3.067484662576687e-06, "loss": 1.437603235244751, "step": 41 }, { "epoch": 0.051533742331288344, "grad_norm": 0.07502411305904388, "learning_rate": 3.1441717791411044e-06, "loss": 1.4542899131774902, "step": 42 }, { "epoch": 0.05276073619631902, "grad_norm": 0.08459946513175964, "learning_rate": 3.2208588957055217e-06, "loss": 1.5027896165847778, "step": 43 }, { "epoch": 0.053987730061349694, "grad_norm": 0.06554778665304184, "learning_rate": 3.2975460122699386e-06, "loss": 1.3057382106781006, "step": 44 }, { "epoch": 0.05521472392638037, "grad_norm": 0.06956043839454651, "learning_rate": 3.374233128834356e-06, "loss": 1.3026341199874878, "step": 45 }, { "epoch": 0.05644171779141104, "grad_norm": 0.09831614047288895, "learning_rate": 3.4509202453987733e-06, "loss": 1.357859492301941, "step": 46 }, { "epoch": 0.05766871165644172, "grad_norm": 0.09383076429367065, "learning_rate": 3.52760736196319e-06, "loss": 1.3813140392303467, "step": 47 }, { "epoch": 0.05889570552147239, "grad_norm": 0.07468827068805695, "learning_rate": 3.6042944785276075e-06, "loss": 1.5053529739379883, "step": 48 }, { "epoch": 0.06012269938650307, "grad_norm": 0.07278682291507721, "learning_rate": 3.680981595092025e-06, "loss": 1.3573490381240845, "step": 49 }, { "epoch": 0.06134969325153374, "grad_norm": 0.07713132351636887, "learning_rate": 3.7576687116564418e-06, "loss": 1.389432430267334, "step": 50 }, { "epoch": 0.06257668711656442, "grad_norm": 0.08927592635154724, "learning_rate": 3.834355828220859e-06, "loss": 1.354691982269287, "step": 51 }, { "epoch": 0.0638036809815951, "grad_norm": 0.07328439503908157, "learning_rate": 3.911042944785277e-06, "loss": 1.4615235328674316, "step": 52 }, { "epoch": 0.06503067484662577, "grad_norm": 0.1023353561758995, "learning_rate": 3.987730061349693e-06, "loss": 1.2745380401611328, "step": 53 }, { "epoch": 0.06625766871165645, "grad_norm": 0.09883410483598709, "learning_rate": 4.064417177914111e-06, "loss": 1.536147952079773, "step": 54 }, { "epoch": 0.06748466257668712, "grad_norm": 0.0988430604338646, "learning_rate": 4.141104294478528e-06, "loss": 1.233506679534912, "step": 55 }, { "epoch": 0.0687116564417178, "grad_norm": 0.09460455179214478, "learning_rate": 4.2177914110429445e-06, "loss": 1.4381420612335205, "step": 56 }, { "epoch": 0.06993865030674846, "grad_norm": 0.08306065201759338, "learning_rate": 4.294478527607362e-06, "loss": 1.373732328414917, "step": 57 }, { "epoch": 0.07116564417177915, "grad_norm": 0.09126616269350052, "learning_rate": 4.371165644171779e-06, "loss": 1.3677804470062256, "step": 58 }, { "epoch": 0.07239263803680981, "grad_norm": 0.10076750069856644, "learning_rate": 4.447852760736196e-06, "loss": 1.517656683921814, "step": 59 }, { "epoch": 0.0736196319018405, "grad_norm": 0.09846878796815872, "learning_rate": 4.524539877300614e-06, "loss": 1.4145147800445557, "step": 60 }, { "epoch": 0.07484662576687116, "grad_norm": 0.09265545010566711, "learning_rate": 4.601226993865031e-06, "loss": 1.3576478958129883, "step": 61 }, { "epoch": 0.07607361963190185, "grad_norm": 0.08007940649986267, "learning_rate": 4.6779141104294485e-06, "loss": 1.2068111896514893, "step": 62 }, { "epoch": 0.07730061349693251, "grad_norm": 0.08868364989757538, "learning_rate": 4.7546012269938654e-06, "loss": 1.6644642353057861, "step": 63 }, { "epoch": 0.0785276073619632, "grad_norm": 0.09127894788980484, "learning_rate": 4.831288343558282e-06, "loss": 1.3978849649429321, "step": 64 }, { "epoch": 0.07975460122699386, "grad_norm": 0.08983105421066284, "learning_rate": 4.9079754601227e-06, "loss": 1.340972661972046, "step": 65 }, { "epoch": 0.08098159509202454, "grad_norm": 0.1077636331319809, "learning_rate": 4.984662576687116e-06, "loss": 1.2714447975158691, "step": 66 }, { "epoch": 0.08220858895705521, "grad_norm": 0.08831098675727844, "learning_rate": 5.061349693251534e-06, "loss": 1.3161592483520508, "step": 67 }, { "epoch": 0.0834355828220859, "grad_norm": 0.09379947930574417, "learning_rate": 5.138036809815952e-06, "loss": 1.4364651441574097, "step": 68 }, { "epoch": 0.08466257668711656, "grad_norm": 0.09187101572751999, "learning_rate": 5.214723926380368e-06, "loss": 1.349583387374878, "step": 69 }, { "epoch": 0.08588957055214724, "grad_norm": 0.0946347787976265, "learning_rate": 5.2914110429447855e-06, "loss": 1.4100244045257568, "step": 70 }, { "epoch": 0.08711656441717791, "grad_norm": 0.10264250636100769, "learning_rate": 5.368098159509203e-06, "loss": 1.3347933292388916, "step": 71 }, { "epoch": 0.08834355828220859, "grad_norm": 0.11045140773057938, "learning_rate": 5.444785276073619e-06, "loss": 1.274242877960205, "step": 72 }, { "epoch": 0.08957055214723926, "grad_norm": 0.10263700783252716, "learning_rate": 5.521472392638037e-06, "loss": 1.4386701583862305, "step": 73 }, { "epoch": 0.09079754601226994, "grad_norm": 0.08373517543077469, "learning_rate": 5.598159509202455e-06, "loss": 1.1995660066604614, "step": 74 }, { "epoch": 0.09202453987730061, "grad_norm": 0.09021089226007462, "learning_rate": 5.674846625766871e-06, "loss": 1.4014225006103516, "step": 75 }, { "epoch": 0.09325153374233129, "grad_norm": 0.09492361545562744, "learning_rate": 5.751533742331289e-06, "loss": 1.2872931957244873, "step": 76 }, { "epoch": 0.09447852760736196, "grad_norm": 0.09280592948198318, "learning_rate": 5.828220858895706e-06, "loss": 1.242187261581421, "step": 77 }, { "epoch": 0.09570552147239264, "grad_norm": 0.09655126184225082, "learning_rate": 5.9049079754601225e-06, "loss": 1.347137451171875, "step": 78 }, { "epoch": 0.09693251533742331, "grad_norm": 0.09948047995567322, "learning_rate": 5.98159509202454e-06, "loss": 1.4971836805343628, "step": 79 }, { "epoch": 0.09815950920245399, "grad_norm": 0.10510187596082687, "learning_rate": 6.058282208588957e-06, "loss": 1.2555480003356934, "step": 80 }, { "epoch": 0.09938650306748466, "grad_norm": 0.10650225728750229, "learning_rate": 6.134969325153374e-06, "loss": 1.4250295162200928, "step": 81 }, { "epoch": 0.10061349693251534, "grad_norm": 0.09000247716903687, "learning_rate": 6.211656441717792e-06, "loss": 1.354022741317749, "step": 82 }, { "epoch": 0.10184049079754601, "grad_norm": 0.09123768657445908, "learning_rate": 6.288343558282209e-06, "loss": 1.3554480075836182, "step": 83 }, { "epoch": 0.10306748466257669, "grad_norm": 0.09282216429710388, "learning_rate": 6.365030674846626e-06, "loss": 1.2707648277282715, "step": 84 }, { "epoch": 0.10429447852760736, "grad_norm": 0.09543449431657791, "learning_rate": 6.4417177914110434e-06, "loss": 1.2168090343475342, "step": 85 }, { "epoch": 0.10552147239263804, "grad_norm": 0.09843157976865768, "learning_rate": 6.51840490797546e-06, "loss": 1.260111927986145, "step": 86 }, { "epoch": 0.1067484662576687, "grad_norm": 0.11217689514160156, "learning_rate": 6.595092024539877e-06, "loss": 1.3698742389678955, "step": 87 }, { "epoch": 0.10797546012269939, "grad_norm": 0.09749456495046616, "learning_rate": 6.671779141104295e-06, "loss": 1.3580715656280518, "step": 88 }, { "epoch": 0.10920245398773006, "grad_norm": 0.10477589815855026, "learning_rate": 6.748466257668712e-06, "loss": 1.3862428665161133, "step": 89 }, { "epoch": 0.11042944785276074, "grad_norm": 0.10288488119840622, "learning_rate": 6.825153374233129e-06, "loss": 1.265109658241272, "step": 90 }, { "epoch": 0.1116564417177914, "grad_norm": 0.09684387594461441, "learning_rate": 6.901840490797547e-06, "loss": 1.323692798614502, "step": 91 }, { "epoch": 0.11288343558282209, "grad_norm": 0.10333628952503204, "learning_rate": 6.9785276073619635e-06, "loss": 1.389283537864685, "step": 92 }, { "epoch": 0.11411042944785275, "grad_norm": 0.10879465192556381, "learning_rate": 7.05521472392638e-06, "loss": 1.395522117614746, "step": 93 }, { "epoch": 0.11533742331288344, "grad_norm": 0.09067656099796295, "learning_rate": 7.131901840490798e-06, "loss": 1.17533540725708, "step": 94 }, { "epoch": 0.1165644171779141, "grad_norm": 0.09927186369895935, "learning_rate": 7.208588957055215e-06, "loss": 1.3598401546478271, "step": 95 }, { "epoch": 0.11779141104294479, "grad_norm": 0.1038227528333664, "learning_rate": 7.285276073619632e-06, "loss": 1.392061471939087, "step": 96 }, { "epoch": 0.11901840490797547, "grad_norm": 0.09537981450557709, "learning_rate": 7.36196319018405e-06, "loss": 1.2397699356079102, "step": 97 }, { "epoch": 0.12024539877300613, "grad_norm": 0.08968851715326309, "learning_rate": 7.438650306748467e-06, "loss": 1.295367956161499, "step": 98 }, { "epoch": 0.12147239263803682, "grad_norm": 0.10279387980699539, "learning_rate": 7.5153374233128836e-06, "loss": 1.2284561395645142, "step": 99 }, { "epoch": 0.12269938650306748, "grad_norm": 0.09359531849622726, "learning_rate": 7.592024539877301e-06, "loss": 1.2822641134262085, "step": 100 }, { "epoch": 0.12392638036809817, "grad_norm": 0.07995408773422241, "learning_rate": 7.668711656441718e-06, "loss": 1.2809827327728271, "step": 101 }, { "epoch": 0.12515337423312883, "grad_norm": 0.08832208067178726, "learning_rate": 7.745398773006135e-06, "loss": 1.3367336988449097, "step": 102 }, { "epoch": 0.1263803680981595, "grad_norm": 0.09752494096755981, "learning_rate": 7.822085889570554e-06, "loss": 1.2547138929367065, "step": 103 }, { "epoch": 0.1276073619631902, "grad_norm": 0.0881728082895279, "learning_rate": 7.89877300613497e-06, "loss": 1.3664880990982056, "step": 104 }, { "epoch": 0.12883435582822086, "grad_norm": 0.0837954729795456, "learning_rate": 7.975460122699386e-06, "loss": 1.217215895652771, "step": 105 }, { "epoch": 0.13006134969325153, "grad_norm": 0.09412740916013718, "learning_rate": 8.052147239263803e-06, "loss": 1.381158709526062, "step": 106 }, { "epoch": 0.1312883435582822, "grad_norm": 0.08788621425628662, "learning_rate": 8.128834355828221e-06, "loss": 1.1602442264556885, "step": 107 }, { "epoch": 0.1325153374233129, "grad_norm": 0.07725191116333008, "learning_rate": 8.205521472392638e-06, "loss": 1.263871669769287, "step": 108 }, { "epoch": 0.13374233128834356, "grad_norm": 0.086419977247715, "learning_rate": 8.282208588957055e-06, "loss": 1.3760960102081299, "step": 109 }, { "epoch": 0.13496932515337423, "grad_norm": 0.09353740513324738, "learning_rate": 8.358895705521474e-06, "loss": 1.3359466791152954, "step": 110 }, { "epoch": 0.1361963190184049, "grad_norm": 0.07447539269924164, "learning_rate": 8.435582822085889e-06, "loss": 1.2698533535003662, "step": 111 }, { "epoch": 0.1374233128834356, "grad_norm": 0.10644813627004623, "learning_rate": 8.512269938650306e-06, "loss": 1.1220934391021729, "step": 112 }, { "epoch": 0.13865030674846626, "grad_norm": 0.06901716440916061, "learning_rate": 8.588957055214725e-06, "loss": 1.1655170917510986, "step": 113 }, { "epoch": 0.13987730061349693, "grad_norm": 0.0855763703584671, "learning_rate": 8.665644171779141e-06, "loss": 1.1486337184906006, "step": 114 }, { "epoch": 0.1411042944785276, "grad_norm": 0.09067295491695404, "learning_rate": 8.742331288343558e-06, "loss": 1.1816922426223755, "step": 115 }, { "epoch": 0.1423312883435583, "grad_norm": 0.0844568982720375, "learning_rate": 8.819018404907977e-06, "loss": 1.215814232826233, "step": 116 }, { "epoch": 0.14355828220858896, "grad_norm": 0.06900807470083237, "learning_rate": 8.895705521472392e-06, "loss": 1.2119998931884766, "step": 117 }, { "epoch": 0.14478527607361963, "grad_norm": 0.07901526242494583, "learning_rate": 8.972392638036809e-06, "loss": 1.2455894947052002, "step": 118 }, { "epoch": 0.1460122699386503, "grad_norm": 0.07439431548118591, "learning_rate": 9.049079754601228e-06, "loss": 1.2518129348754883, "step": 119 }, { "epoch": 0.147239263803681, "grad_norm": 0.08350270241498947, "learning_rate": 9.125766871165645e-06, "loss": 1.2978299856185913, "step": 120 }, { "epoch": 0.14846625766871166, "grad_norm": 0.07621041685342789, "learning_rate": 9.202453987730062e-06, "loss": 1.2295458316802979, "step": 121 }, { "epoch": 0.14969325153374233, "grad_norm": 0.08300689607858658, "learning_rate": 9.27914110429448e-06, "loss": 1.3470526933670044, "step": 122 }, { "epoch": 0.150920245398773, "grad_norm": 0.0724540650844574, "learning_rate": 9.355828220858897e-06, "loss": 1.1989648342132568, "step": 123 }, { "epoch": 0.1521472392638037, "grad_norm": 0.07644130289554596, "learning_rate": 9.432515337423312e-06, "loss": 1.2179319858551025, "step": 124 }, { "epoch": 0.15337423312883436, "grad_norm": 0.07522676140069962, "learning_rate": 9.509202453987731e-06, "loss": 1.2217178344726562, "step": 125 }, { "epoch": 0.15460122699386503, "grad_norm": 0.07084149122238159, "learning_rate": 9.585889570552148e-06, "loss": 1.1610798835754395, "step": 126 }, { "epoch": 0.1558282208588957, "grad_norm": 0.07349451631307602, "learning_rate": 9.662576687116565e-06, "loss": 1.2103276252746582, "step": 127 }, { "epoch": 0.1570552147239264, "grad_norm": 0.07179653644561768, "learning_rate": 9.739263803680983e-06, "loss": 1.247308373451233, "step": 128 }, { "epoch": 0.15828220858895706, "grad_norm": 0.0752854123711586, "learning_rate": 9.8159509202454e-06, "loss": 1.2092695236206055, "step": 129 }, { "epoch": 0.15950920245398773, "grad_norm": 0.06951854377985, "learning_rate": 9.892638036809815e-06, "loss": 1.1004083156585693, "step": 130 }, { "epoch": 0.1607361963190184, "grad_norm": 0.07509293407201767, "learning_rate": 9.969325153374232e-06, "loss": 1.2282463312149048, "step": 131 }, { "epoch": 0.1619631901840491, "grad_norm": 0.07161499559879303, "learning_rate": 1.0046012269938651e-05, "loss": 1.312925100326538, "step": 132 }, { "epoch": 0.16319018404907976, "grad_norm": 0.07607123255729675, "learning_rate": 1.0122699386503068e-05, "loss": 1.3124730587005615, "step": 133 }, { "epoch": 0.16441717791411042, "grad_norm": 0.07267063856124878, "learning_rate": 1.0199386503067485e-05, "loss": 1.1374307870864868, "step": 134 }, { "epoch": 0.1656441717791411, "grad_norm": 0.07660125941038132, "learning_rate": 1.0276073619631903e-05, "loss": 1.1595758199691772, "step": 135 }, { "epoch": 0.1668711656441718, "grad_norm": 0.07634468376636505, "learning_rate": 1.0352760736196319e-05, "loss": 1.1585400104522705, "step": 136 }, { "epoch": 0.16809815950920245, "grad_norm": 0.07540547847747803, "learning_rate": 1.0429447852760736e-05, "loss": 1.2559616565704346, "step": 137 }, { "epoch": 0.16932515337423312, "grad_norm": 0.06400100141763687, "learning_rate": 1.0506134969325154e-05, "loss": 1.2289286851882935, "step": 138 }, { "epoch": 0.1705521472392638, "grad_norm": 0.08367716521024704, "learning_rate": 1.0582822085889571e-05, "loss": 1.169529676437378, "step": 139 }, { "epoch": 0.17177914110429449, "grad_norm": 0.07152585685253143, "learning_rate": 1.0659509202453988e-05, "loss": 1.2492527961730957, "step": 140 }, { "epoch": 0.17300613496932515, "grad_norm": 0.07628720253705978, "learning_rate": 1.0736196319018407e-05, "loss": 1.169081687927246, "step": 141 }, { "epoch": 0.17423312883435582, "grad_norm": 0.07779771089553833, "learning_rate": 1.0812883435582823e-05, "loss": 1.294127106666565, "step": 142 }, { "epoch": 0.1754601226993865, "grad_norm": 0.0831819698214531, "learning_rate": 1.0889570552147239e-05, "loss": 1.191867470741272, "step": 143 }, { "epoch": 0.17668711656441718, "grad_norm": 0.07639773935079575, "learning_rate": 1.0966257668711657e-05, "loss": 1.1590148210525513, "step": 144 }, { "epoch": 0.17791411042944785, "grad_norm": 0.06675959378480911, "learning_rate": 1.1042944785276074e-05, "loss": 1.1139436960220337, "step": 145 }, { "epoch": 0.17914110429447852, "grad_norm": 0.07247823476791382, "learning_rate": 1.1119631901840491e-05, "loss": 1.1570465564727783, "step": 146 }, { "epoch": 0.18036809815950922, "grad_norm": 0.0793304368853569, "learning_rate": 1.119631901840491e-05, "loss": 1.3373682498931885, "step": 147 }, { "epoch": 0.18159509202453988, "grad_norm": 0.08718875050544739, "learning_rate": 1.1273006134969327e-05, "loss": 1.1467812061309814, "step": 148 }, { "epoch": 0.18282208588957055, "grad_norm": 0.07898775488138199, "learning_rate": 1.1349693251533742e-05, "loss": 1.1690555810928345, "step": 149 }, { "epoch": 0.18404907975460122, "grad_norm": 0.0748438760638237, "learning_rate": 1.142638036809816e-05, "loss": 1.286328673362732, "step": 150 }, { "epoch": 0.18527607361963191, "grad_norm": 0.07474487274885178, "learning_rate": 1.1503067484662577e-05, "loss": 1.2229455709457397, "step": 151 }, { "epoch": 0.18650306748466258, "grad_norm": 0.07763181626796722, "learning_rate": 1.1579754601226994e-05, "loss": 1.131850242614746, "step": 152 }, { "epoch": 0.18773006134969325, "grad_norm": 0.07221898436546326, "learning_rate": 1.1656441717791411e-05, "loss": 1.1305968761444092, "step": 153 }, { "epoch": 0.18895705521472392, "grad_norm": 0.07231416553258896, "learning_rate": 1.173312883435583e-05, "loss": 1.2581983804702759, "step": 154 }, { "epoch": 0.1901840490797546, "grad_norm": 0.0842069461941719, "learning_rate": 1.1809815950920245e-05, "loss": 1.1937918663024902, "step": 155 }, { "epoch": 0.19141104294478528, "grad_norm": 0.07470091432332993, "learning_rate": 1.1886503067484662e-05, "loss": 1.1321823596954346, "step": 156 }, { "epoch": 0.19263803680981595, "grad_norm": 0.07048668712377548, "learning_rate": 1.196319018404908e-05, "loss": 1.186060905456543, "step": 157 }, { "epoch": 0.19386503067484662, "grad_norm": 0.15513236820697784, "learning_rate": 1.2039877300613497e-05, "loss": 1.1082484722137451, "step": 158 }, { "epoch": 0.1950920245398773, "grad_norm": 0.07174337655305862, "learning_rate": 1.2116564417177914e-05, "loss": 1.0334205627441406, "step": 159 }, { "epoch": 0.19631901840490798, "grad_norm": 0.08637174218893051, "learning_rate": 1.2193251533742333e-05, "loss": 1.2800869941711426, "step": 160 }, { "epoch": 0.19754601226993865, "grad_norm": 0.07679158449172974, "learning_rate": 1.2269938650306748e-05, "loss": 1.0969552993774414, "step": 161 }, { "epoch": 0.19877300613496932, "grad_norm": 0.07408922910690308, "learning_rate": 1.2346625766871165e-05, "loss": 1.0420407056808472, "step": 162 }, { "epoch": 0.2, "grad_norm": 0.07723739743232727, "learning_rate": 1.2423312883435584e-05, "loss": 1.2170075178146362, "step": 163 }, { "epoch": 0.20122699386503068, "grad_norm": 0.09736181795597076, "learning_rate": 1.25e-05, "loss": 1.092185139656067, "step": 164 }, { "epoch": 0.20245398773006135, "grad_norm": 0.08557657897472382, "learning_rate": 1.2576687116564418e-05, "loss": 1.1464152336120605, "step": 165 }, { "epoch": 0.20368098159509201, "grad_norm": 0.08090342581272125, "learning_rate": 1.2653374233128834e-05, "loss": 1.0590012073516846, "step": 166 }, { "epoch": 0.2049079754601227, "grad_norm": 0.0782066136598587, "learning_rate": 1.2730061349693251e-05, "loss": 1.093062162399292, "step": 167 }, { "epoch": 0.20613496932515338, "grad_norm": 0.07601075619459152, "learning_rate": 1.280674846625767e-05, "loss": 1.0593127012252808, "step": 168 }, { "epoch": 0.20736196319018405, "grad_norm": 0.07887541502714157, "learning_rate": 1.2883435582822087e-05, "loss": 1.1101746559143066, "step": 169 }, { "epoch": 0.2085889570552147, "grad_norm": 0.08362209796905518, "learning_rate": 1.2960122699386504e-05, "loss": 1.127945065498352, "step": 170 }, { "epoch": 0.2098159509202454, "grad_norm": 0.07187578082084656, "learning_rate": 1.303680981595092e-05, "loss": 0.9966148138046265, "step": 171 }, { "epoch": 0.21104294478527608, "grad_norm": 0.08967337757349014, "learning_rate": 1.3113496932515338e-05, "loss": 1.2105052471160889, "step": 172 }, { "epoch": 0.21226993865030674, "grad_norm": 0.08062509447336197, "learning_rate": 1.3190184049079754e-05, "loss": 1.124169111251831, "step": 173 }, { "epoch": 0.2134969325153374, "grad_norm": 0.08591330051422119, "learning_rate": 1.3266871165644173e-05, "loss": 1.2784122228622437, "step": 174 }, { "epoch": 0.2147239263803681, "grad_norm": 0.07611163705587387, "learning_rate": 1.334355828220859e-05, "loss": 1.3006565570831299, "step": 175 }, { "epoch": 0.21595092024539878, "grad_norm": 0.09474623948335648, "learning_rate": 1.3420245398773007e-05, "loss": 1.0671237707138062, "step": 176 }, { "epoch": 0.21717791411042944, "grad_norm": 0.08161427825689316, "learning_rate": 1.3496932515337424e-05, "loss": 1.1499321460723877, "step": 177 }, { "epoch": 0.2184049079754601, "grad_norm": 0.0793696790933609, "learning_rate": 1.357361963190184e-05, "loss": 1.1806647777557373, "step": 178 }, { "epoch": 0.2196319018404908, "grad_norm": 0.07412323355674744, "learning_rate": 1.3650306748466258e-05, "loss": 1.1109105348587036, "step": 179 }, { "epoch": 0.22085889570552147, "grad_norm": 0.09603255242109299, "learning_rate": 1.3726993865030676e-05, "loss": 1.0471618175506592, "step": 180 }, { "epoch": 0.22208588957055214, "grad_norm": 0.08905616402626038, "learning_rate": 1.3803680981595093e-05, "loss": 1.1427825689315796, "step": 181 }, { "epoch": 0.2233128834355828, "grad_norm": 0.09254536777734756, "learning_rate": 1.388036809815951e-05, "loss": 1.1951453685760498, "step": 182 }, { "epoch": 0.2245398773006135, "grad_norm": 0.08542929589748383, "learning_rate": 1.3957055214723927e-05, "loss": 1.1082501411437988, "step": 183 }, { "epoch": 0.22576687116564417, "grad_norm": 0.08667684346437454, "learning_rate": 1.4033742331288344e-05, "loss": 1.172175645828247, "step": 184 }, { "epoch": 0.22699386503067484, "grad_norm": 0.08812960237264633, "learning_rate": 1.411042944785276e-05, "loss": 1.113857626914978, "step": 185 }, { "epoch": 0.2282208588957055, "grad_norm": 0.08759750425815582, "learning_rate": 1.418711656441718e-05, "loss": 1.0720980167388916, "step": 186 }, { "epoch": 0.2294478527607362, "grad_norm": 0.07663634419441223, "learning_rate": 1.4263803680981596e-05, "loss": 1.1362175941467285, "step": 187 }, { "epoch": 0.23067484662576687, "grad_norm": 0.0884539932012558, "learning_rate": 1.4340490797546013e-05, "loss": 1.030583381652832, "step": 188 }, { "epoch": 0.23190184049079754, "grad_norm": 0.07207372039556503, "learning_rate": 1.441717791411043e-05, "loss": 1.1791778802871704, "step": 189 }, { "epoch": 0.2331288343558282, "grad_norm": 0.09309247136116028, "learning_rate": 1.4493865030674847e-05, "loss": 1.1558406352996826, "step": 190 }, { "epoch": 0.2343558282208589, "grad_norm": 0.08264083415269852, "learning_rate": 1.4570552147239264e-05, "loss": 1.1436799764633179, "step": 191 }, { "epoch": 0.23558282208588957, "grad_norm": 0.08998411893844604, "learning_rate": 1.4647239263803681e-05, "loss": 0.9984644055366516, "step": 192 }, { "epoch": 0.23680981595092024, "grad_norm": 0.09304746985435486, "learning_rate": 1.47239263803681e-05, "loss": 1.1322174072265625, "step": 193 }, { "epoch": 0.23803680981595093, "grad_norm": 0.09598280489444733, "learning_rate": 1.4800613496932516e-05, "loss": 1.103399634361267, "step": 194 }, { "epoch": 0.2392638036809816, "grad_norm": 0.09808170050382614, "learning_rate": 1.4877300613496933e-05, "loss": 1.1747708320617676, "step": 195 }, { "epoch": 0.24049079754601227, "grad_norm": 0.08757835626602173, "learning_rate": 1.495398773006135e-05, "loss": 1.135438323020935, "step": 196 }, { "epoch": 0.24171779141104294, "grad_norm": 0.08952710032463074, "learning_rate": 1.5030674846625767e-05, "loss": 1.0426082611083984, "step": 197 }, { "epoch": 0.24294478527607363, "grad_norm": 0.09966392815113068, "learning_rate": 1.5107361963190184e-05, "loss": 1.2231348752975464, "step": 198 }, { "epoch": 0.2441717791411043, "grad_norm": 0.08228705823421478, "learning_rate": 1.5184049079754603e-05, "loss": 1.0428478717803955, "step": 199 }, { "epoch": 0.24539877300613497, "grad_norm": 0.09374398738145828, "learning_rate": 1.526073619631902e-05, "loss": 1.057676076889038, "step": 200 }, { "epoch": 0.24662576687116564, "grad_norm": 0.08426011353731155, "learning_rate": 1.5337423312883436e-05, "loss": 1.1756813526153564, "step": 201 }, { "epoch": 0.24785276073619633, "grad_norm": 0.08629194647073746, "learning_rate": 1.5414110429447852e-05, "loss": 1.1612563133239746, "step": 202 }, { "epoch": 0.249079754601227, "grad_norm": 0.1029244139790535, "learning_rate": 1.549079754601227e-05, "loss": 1.0064386129379272, "step": 203 }, { "epoch": 0.25030674846625767, "grad_norm": 0.08551669865846634, "learning_rate": 1.5567484662576686e-05, "loss": 1.2138303518295288, "step": 204 }, { "epoch": 0.25153374233128833, "grad_norm": 0.08521132916212082, "learning_rate": 1.5644171779141108e-05, "loss": 1.0655517578125, "step": 205 }, { "epoch": 0.252760736196319, "grad_norm": 0.0902361199259758, "learning_rate": 1.5720858895705523e-05, "loss": 1.1724493503570557, "step": 206 }, { "epoch": 0.25398773006134967, "grad_norm": 0.0905335545539856, "learning_rate": 1.579754601226994e-05, "loss": 1.0633145570755005, "step": 207 }, { "epoch": 0.2552147239263804, "grad_norm": 0.10272892564535141, "learning_rate": 1.5874233128834357e-05, "loss": 1.034580111503601, "step": 208 }, { "epoch": 0.25644171779141106, "grad_norm": 0.08878074586391449, "learning_rate": 1.5950920245398772e-05, "loss": 1.1094441413879395, "step": 209 }, { "epoch": 0.25766871165644173, "grad_norm": 0.09403765946626663, "learning_rate": 1.602760736196319e-05, "loss": 1.0656667947769165, "step": 210 }, { "epoch": 0.2588957055214724, "grad_norm": 0.10467862337827682, "learning_rate": 1.6104294478527606e-05, "loss": 1.1456003189086914, "step": 211 }, { "epoch": 0.26012269938650306, "grad_norm": 0.09688597917556763, "learning_rate": 1.6180981595092028e-05, "loss": 1.124730110168457, "step": 212 }, { "epoch": 0.26134969325153373, "grad_norm": 0.09323239326477051, "learning_rate": 1.6257668711656443e-05, "loss": 1.1329385042190552, "step": 213 }, { "epoch": 0.2625766871165644, "grad_norm": 0.10989087074995041, "learning_rate": 1.633435582822086e-05, "loss": 1.2534266710281372, "step": 214 }, { "epoch": 0.26380368098159507, "grad_norm": 0.10287831723690033, "learning_rate": 1.6411042944785277e-05, "loss": 1.0936100482940674, "step": 215 }, { "epoch": 0.2650306748466258, "grad_norm": 0.09896902740001678, "learning_rate": 1.6487730061349692e-05, "loss": 1.0307230949401855, "step": 216 }, { "epoch": 0.26625766871165646, "grad_norm": 0.10131006687879562, "learning_rate": 1.656441717791411e-05, "loss": 1.1618880033493042, "step": 217 }, { "epoch": 0.2674846625766871, "grad_norm": 0.10399206727743149, "learning_rate": 1.664110429447853e-05, "loss": 1.1229549646377563, "step": 218 }, { "epoch": 0.2687116564417178, "grad_norm": 0.10053529590368271, "learning_rate": 1.6717791411042948e-05, "loss": 1.1616917848587036, "step": 219 }, { "epoch": 0.26993865030674846, "grad_norm": 0.09529908001422882, "learning_rate": 1.6794478527607363e-05, "loss": 1.0340030193328857, "step": 220 }, { "epoch": 0.27116564417177913, "grad_norm": 0.1017000749707222, "learning_rate": 1.6871165644171778e-05, "loss": 1.0947508811950684, "step": 221 }, { "epoch": 0.2723926380368098, "grad_norm": 0.08638525754213333, "learning_rate": 1.6947852760736197e-05, "loss": 0.9798234701156616, "step": 222 }, { "epoch": 0.27361963190184047, "grad_norm": 0.10133469104766846, "learning_rate": 1.7024539877300612e-05, "loss": 1.0582237243652344, "step": 223 }, { "epoch": 0.2748466257668712, "grad_norm": 0.10113056749105453, "learning_rate": 1.7101226993865034e-05, "loss": 1.066800832748413, "step": 224 }, { "epoch": 0.27607361963190186, "grad_norm": 0.11324694007635117, "learning_rate": 1.717791411042945e-05, "loss": 1.1267540454864502, "step": 225 }, { "epoch": 0.2773006134969325, "grad_norm": 0.0964084267616272, "learning_rate": 1.7254601226993868e-05, "loss": 1.0880162715911865, "step": 226 }, { "epoch": 0.2785276073619632, "grad_norm": 0.10354617238044739, "learning_rate": 1.7331288343558283e-05, "loss": 1.150836706161499, "step": 227 }, { "epoch": 0.27975460122699386, "grad_norm": 0.10236093401908875, "learning_rate": 1.7407975460122698e-05, "loss": 0.9434213042259216, "step": 228 }, { "epoch": 0.2809815950920245, "grad_norm": 0.09847301989793777, "learning_rate": 1.7484662576687117e-05, "loss": 1.0203105211257935, "step": 229 }, { "epoch": 0.2822085889570552, "grad_norm": 0.1049259603023529, "learning_rate": 1.7561349693251535e-05, "loss": 1.0045392513275146, "step": 230 }, { "epoch": 0.28343558282208586, "grad_norm": 0.09362269192934036, "learning_rate": 1.7638036809815954e-05, "loss": 0.9903603196144104, "step": 231 }, { "epoch": 0.2846625766871166, "grad_norm": 0.08344310522079468, "learning_rate": 1.771472392638037e-05, "loss": 1.078027367591858, "step": 232 }, { "epoch": 0.28588957055214725, "grad_norm": 0.14989130198955536, "learning_rate": 1.7791411042944784e-05, "loss": 0.8905891180038452, "step": 233 }, { "epoch": 0.2871165644171779, "grad_norm": 0.11933330446481705, "learning_rate": 1.7868098159509203e-05, "loss": 1.0191709995269775, "step": 234 }, { "epoch": 0.2883435582822086, "grad_norm": 0.09907590597867966, "learning_rate": 1.7944785276073618e-05, "loss": 0.97510826587677, "step": 235 }, { "epoch": 0.28957055214723926, "grad_norm": 0.11432763934135437, "learning_rate": 1.8021472392638037e-05, "loss": 1.2066056728363037, "step": 236 }, { "epoch": 0.2907975460122699, "grad_norm": 0.09880590438842773, "learning_rate": 1.8098159509202455e-05, "loss": 1.0608339309692383, "step": 237 }, { "epoch": 0.2920245398773006, "grad_norm": 0.09431140124797821, "learning_rate": 1.8174846625766874e-05, "loss": 0.994207501411438, "step": 238 }, { "epoch": 0.29325153374233126, "grad_norm": 0.11933495849370956, "learning_rate": 1.825153374233129e-05, "loss": 0.9558770656585693, "step": 239 }, { "epoch": 0.294478527607362, "grad_norm": 0.11472880095243454, "learning_rate": 1.8328220858895704e-05, "loss": 1.0349183082580566, "step": 240 }, { "epoch": 0.29570552147239265, "grad_norm": 0.09867944568395615, "learning_rate": 1.8404907975460123e-05, "loss": 1.0970228910446167, "step": 241 }, { "epoch": 0.2969325153374233, "grad_norm": 0.10022281855344772, "learning_rate": 1.848159509202454e-05, "loss": 1.093743920326233, "step": 242 }, { "epoch": 0.298159509202454, "grad_norm": 0.12287919968366623, "learning_rate": 1.855828220858896e-05, "loss": 1.1307411193847656, "step": 243 }, { "epoch": 0.29938650306748466, "grad_norm": 0.09696297347545624, "learning_rate": 1.8634969325153376e-05, "loss": 1.1624317169189453, "step": 244 }, { "epoch": 0.3006134969325153, "grad_norm": 0.11268558353185654, "learning_rate": 1.8711656441717794e-05, "loss": 1.1427913904190063, "step": 245 }, { "epoch": 0.301840490797546, "grad_norm": 0.09550510346889496, "learning_rate": 1.878834355828221e-05, "loss": 1.0877418518066406, "step": 246 }, { "epoch": 0.3030674846625767, "grad_norm": 0.09527410566806793, "learning_rate": 1.8865030674846625e-05, "loss": 1.0739914178848267, "step": 247 }, { "epoch": 0.3042944785276074, "grad_norm": 0.14008916914463043, "learning_rate": 1.8941717791411043e-05, "loss": 0.9480798244476318, "step": 248 }, { "epoch": 0.30552147239263805, "grad_norm": 0.1293039619922638, "learning_rate": 1.9018404907975462e-05, "loss": 0.992428183555603, "step": 249 }, { "epoch": 0.3067484662576687, "grad_norm": 0.11473323404788971, "learning_rate": 1.909509202453988e-05, "loss": 1.0926830768585205, "step": 250 }, { "epoch": 0.3079754601226994, "grad_norm": 0.09950664639472961, "learning_rate": 1.9171779141104296e-05, "loss": 1.2386255264282227, "step": 251 }, { "epoch": 0.30920245398773005, "grad_norm": 0.10663459450006485, "learning_rate": 1.924846625766871e-05, "loss": 1.137174129486084, "step": 252 }, { "epoch": 0.3104294478527607, "grad_norm": 0.12015614658594131, "learning_rate": 1.932515337423313e-05, "loss": 1.018200397491455, "step": 253 }, { "epoch": 0.3116564417177914, "grad_norm": 0.12178294360637665, "learning_rate": 1.9401840490797545e-05, "loss": 1.03646981716156, "step": 254 }, { "epoch": 0.3128834355828221, "grad_norm": 0.09648360311985016, "learning_rate": 1.9478527607361967e-05, "loss": 1.1479485034942627, "step": 255 }, { "epoch": 0.3141104294478528, "grad_norm": 0.10974877327680588, "learning_rate": 1.9555214723926382e-05, "loss": 1.1105828285217285, "step": 256 }, { "epoch": 0.31533742331288345, "grad_norm": 0.13123326003551483, "learning_rate": 1.96319018404908e-05, "loss": 1.0254418849945068, "step": 257 }, { "epoch": 0.3165644171779141, "grad_norm": 0.12058497965335846, "learning_rate": 1.9708588957055216e-05, "loss": 0.8520700335502625, "step": 258 }, { "epoch": 0.3177914110429448, "grad_norm": 0.10527869313955307, "learning_rate": 1.978527607361963e-05, "loss": 1.057407021522522, "step": 259 }, { "epoch": 0.31901840490797545, "grad_norm": 0.12831807136535645, "learning_rate": 1.986196319018405e-05, "loss": 0.9605754017829895, "step": 260 }, { "epoch": 0.3202453987730061, "grad_norm": 0.10611746460199356, "learning_rate": 1.9938650306748465e-05, "loss": 1.121097445487976, "step": 261 }, { "epoch": 0.3214723926380368, "grad_norm": 0.10287187248468399, "learning_rate": 2.0015337423312887e-05, "loss": 1.0894691944122314, "step": 262 }, { "epoch": 0.3226993865030675, "grad_norm": 0.11541418731212616, "learning_rate": 2.0092024539877302e-05, "loss": 0.9634550213813782, "step": 263 }, { "epoch": 0.3239263803680982, "grad_norm": 0.13788478076457977, "learning_rate": 2.016871165644172e-05, "loss": 1.0188679695129395, "step": 264 }, { "epoch": 0.32515337423312884, "grad_norm": 0.12389227002859116, "learning_rate": 2.0245398773006136e-05, "loss": 1.0685219764709473, "step": 265 }, { "epoch": 0.3263803680981595, "grad_norm": 0.1358790248632431, "learning_rate": 2.032208588957055e-05, "loss": 0.9753469228744507, "step": 266 }, { "epoch": 0.3276073619631902, "grad_norm": 0.1116497740149498, "learning_rate": 2.039877300613497e-05, "loss": 0.957251787185669, "step": 267 }, { "epoch": 0.32883435582822085, "grad_norm": 0.11138273775577545, "learning_rate": 2.0475460122699388e-05, "loss": 0.9344768524169922, "step": 268 }, { "epoch": 0.3300613496932515, "grad_norm": 0.1283080279827118, "learning_rate": 2.0552147239263807e-05, "loss": 1.0072486400604248, "step": 269 }, { "epoch": 0.3312883435582822, "grad_norm": 0.1293177753686905, "learning_rate": 2.0628834355828222e-05, "loss": 0.9404339790344238, "step": 270 }, { "epoch": 0.3325153374233129, "grad_norm": 0.10337118804454803, "learning_rate": 2.0705521472392637e-05, "loss": 0.9937950372695923, "step": 271 }, { "epoch": 0.3337423312883436, "grad_norm": 0.10898245871067047, "learning_rate": 2.0782208588957056e-05, "loss": 1.1300337314605713, "step": 272 }, { "epoch": 0.33496932515337424, "grad_norm": 0.1548905074596405, "learning_rate": 2.085889570552147e-05, "loss": 0.9663505554199219, "step": 273 }, { "epoch": 0.3361963190184049, "grad_norm": 0.12634064257144928, "learning_rate": 2.0935582822085893e-05, "loss": 0.8915446400642395, "step": 274 }, { "epoch": 0.3374233128834356, "grad_norm": 0.09657935798168182, "learning_rate": 2.1012269938650308e-05, "loss": 0.9893382787704468, "step": 275 }, { "epoch": 0.33865030674846625, "grad_norm": 0.17486917972564697, "learning_rate": 2.1088957055214727e-05, "loss": 1.070754051208496, "step": 276 }, { "epoch": 0.3398773006134969, "grad_norm": 0.14501982927322388, "learning_rate": 2.1165644171779142e-05, "loss": 1.1186362504959106, "step": 277 }, { "epoch": 0.3411042944785276, "grad_norm": 0.13628800213336945, "learning_rate": 2.1242331288343557e-05, "loss": 0.9450318217277527, "step": 278 }, { "epoch": 0.3423312883435583, "grad_norm": 0.11076204478740692, "learning_rate": 2.1319018404907976e-05, "loss": 1.005497932434082, "step": 279 }, { "epoch": 0.34355828220858897, "grad_norm": 0.13485747575759888, "learning_rate": 2.1395705521472395e-05, "loss": 0.895561933517456, "step": 280 }, { "epoch": 0.34478527607361964, "grad_norm": 0.12088459730148315, "learning_rate": 2.1472392638036813e-05, "loss": 1.3100700378417969, "step": 281 }, { "epoch": 0.3460122699386503, "grad_norm": 0.11534463614225388, "learning_rate": 2.154907975460123e-05, "loss": 1.11163330078125, "step": 282 }, { "epoch": 0.347239263803681, "grad_norm": 0.12682971358299255, "learning_rate": 2.1625766871165647e-05, "loss": 1.022303581237793, "step": 283 }, { "epoch": 0.34846625766871164, "grad_norm": 0.1394464671611786, "learning_rate": 2.1702453987730062e-05, "loss": 1.1607928276062012, "step": 284 }, { "epoch": 0.3496932515337423, "grad_norm": 0.12041673809289932, "learning_rate": 2.1779141104294477e-05, "loss": 0.9233322739601135, "step": 285 }, { "epoch": 0.350920245398773, "grad_norm": 0.14270088076591492, "learning_rate": 2.1855828220858896e-05, "loss": 1.100630760192871, "step": 286 }, { "epoch": 0.3521472392638037, "grad_norm": 0.147483691573143, "learning_rate": 2.1932515337423315e-05, "loss": 1.0364069938659668, "step": 287 }, { "epoch": 0.35337423312883437, "grad_norm": 0.11873972415924072, "learning_rate": 2.2009202453987733e-05, "loss": 1.0084702968597412, "step": 288 }, { "epoch": 0.35460122699386504, "grad_norm": 0.1410827338695526, "learning_rate": 2.208588957055215e-05, "loss": 0.9259920120239258, "step": 289 }, { "epoch": 0.3558282208588957, "grad_norm": 0.12482478469610214, "learning_rate": 2.2162576687116564e-05, "loss": 0.8635483384132385, "step": 290 }, { "epoch": 0.3570552147239264, "grad_norm": 0.11416301131248474, "learning_rate": 2.2239263803680982e-05, "loss": 1.1208245754241943, "step": 291 }, { "epoch": 0.35828220858895704, "grad_norm": 0.14102044701576233, "learning_rate": 2.2315950920245397e-05, "loss": 1.0029829740524292, "step": 292 }, { "epoch": 0.3595092024539877, "grad_norm": 0.25360825657844543, "learning_rate": 2.239263803680982e-05, "loss": 1.0258023738861084, "step": 293 }, { "epoch": 0.36073619631901843, "grad_norm": 0.13373959064483643, "learning_rate": 2.2469325153374235e-05, "loss": 1.0896992683410645, "step": 294 }, { "epoch": 0.3619631901840491, "grad_norm": 0.13520075380802155, "learning_rate": 2.2546012269938653e-05, "loss": 1.1489715576171875, "step": 295 }, { "epoch": 0.36319018404907977, "grad_norm": 0.11745908111333847, "learning_rate": 2.262269938650307e-05, "loss": 1.0147686004638672, "step": 296 }, { "epoch": 0.36441717791411044, "grad_norm": 0.12096819281578064, "learning_rate": 2.2699386503067484e-05, "loss": 1.2157928943634033, "step": 297 }, { "epoch": 0.3656441717791411, "grad_norm": 0.12824957072734833, "learning_rate": 2.2776073619631902e-05, "loss": 1.037453532218933, "step": 298 }, { "epoch": 0.36687116564417177, "grad_norm": 0.13326457142829895, "learning_rate": 2.285276073619632e-05, "loss": 0.9710744023323059, "step": 299 }, { "epoch": 0.36809815950920244, "grad_norm": 0.14364396035671234, "learning_rate": 2.292944785276074e-05, "loss": 0.9510235786437988, "step": 300 }, { "epoch": 0.3693251533742331, "grad_norm": 0.12908251583576202, "learning_rate": 2.3006134969325155e-05, "loss": 0.9568293690681458, "step": 301 }, { "epoch": 0.37055214723926383, "grad_norm": 0.11252547055482864, "learning_rate": 2.308282208588957e-05, "loss": 1.0541174411773682, "step": 302 }, { "epoch": 0.3717791411042945, "grad_norm": 0.15145935118198395, "learning_rate": 2.315950920245399e-05, "loss": 1.0205459594726562, "step": 303 }, { "epoch": 0.37300613496932516, "grad_norm": 0.1315487176179886, "learning_rate": 2.3236196319018404e-05, "loss": 0.9583384990692139, "step": 304 }, { "epoch": 0.37423312883435583, "grad_norm": 0.11966493725776672, "learning_rate": 2.3312883435582822e-05, "loss": 1.004375696182251, "step": 305 }, { "epoch": 0.3754601226993865, "grad_norm": 0.13790951669216156, "learning_rate": 2.338957055214724e-05, "loss": 1.0536668300628662, "step": 306 }, { "epoch": 0.37668711656441717, "grad_norm": 0.152229905128479, "learning_rate": 2.346625766871166e-05, "loss": 1.2063534259796143, "step": 307 }, { "epoch": 0.37791411042944784, "grad_norm": 0.14724068343639374, "learning_rate": 2.3542944785276075e-05, "loss": 0.9091154336929321, "step": 308 }, { "epoch": 0.3791411042944785, "grad_norm": 0.12967370450496674, "learning_rate": 2.361963190184049e-05, "loss": 1.135591745376587, "step": 309 }, { "epoch": 0.3803680981595092, "grad_norm": 0.12177694588899612, "learning_rate": 2.369631901840491e-05, "loss": 1.1247773170471191, "step": 310 }, { "epoch": 0.3815950920245399, "grad_norm": 0.15460914373397827, "learning_rate": 2.3773006134969324e-05, "loss": 0.9706491231918335, "step": 311 }, { "epoch": 0.38282208588957056, "grad_norm": 0.13451868295669556, "learning_rate": 2.3849693251533746e-05, "loss": 0.99631667137146, "step": 312 }, { "epoch": 0.38404907975460123, "grad_norm": 0.15093882381916046, "learning_rate": 2.392638036809816e-05, "loss": 0.9145414233207703, "step": 313 }, { "epoch": 0.3852760736196319, "grad_norm": 0.14166617393493652, "learning_rate": 2.400306748466258e-05, "loss": 1.0831983089447021, "step": 314 }, { "epoch": 0.38650306748466257, "grad_norm": 0.14582061767578125, "learning_rate": 2.4079754601226995e-05, "loss": 1.0937273502349854, "step": 315 }, { "epoch": 0.38773006134969323, "grad_norm": 0.11296252906322479, "learning_rate": 2.415644171779141e-05, "loss": 0.9945961236953735, "step": 316 }, { "epoch": 0.3889570552147239, "grad_norm": 0.1398955136537552, "learning_rate": 2.423312883435583e-05, "loss": 1.0526220798492432, "step": 317 }, { "epoch": 0.3901840490797546, "grad_norm": 0.1260657161474228, "learning_rate": 2.4309815950920247e-05, "loss": 1.0592193603515625, "step": 318 }, { "epoch": 0.3914110429447853, "grad_norm": 0.16064973175525665, "learning_rate": 2.4386503067484666e-05, "loss": 1.0037996768951416, "step": 319 }, { "epoch": 0.39263803680981596, "grad_norm": 0.14862963557243347, "learning_rate": 2.446319018404908e-05, "loss": 1.074394941329956, "step": 320 }, { "epoch": 0.39386503067484663, "grad_norm": 0.15531349182128906, "learning_rate": 2.4539877300613496e-05, "loss": 0.9046361446380615, "step": 321 }, { "epoch": 0.3950920245398773, "grad_norm": 0.14921541512012482, "learning_rate": 2.4616564417177915e-05, "loss": 1.0954641103744507, "step": 322 }, { "epoch": 0.39631901840490796, "grad_norm": 0.12223833799362183, "learning_rate": 2.469325153374233e-05, "loss": 0.9601402282714844, "step": 323 }, { "epoch": 0.39754601226993863, "grad_norm": 0.12606622278690338, "learning_rate": 2.4769938650306752e-05, "loss": 1.0986905097961426, "step": 324 }, { "epoch": 0.3987730061349693, "grad_norm": 0.13932563364505768, "learning_rate": 2.4846625766871167e-05, "loss": 0.9535547494888306, "step": 325 }, { "epoch": 0.4, "grad_norm": 0.14261290431022644, "learning_rate": 2.4923312883435586e-05, "loss": 1.0713505744934082, "step": 326 }, { "epoch": 0.4012269938650307, "grad_norm": 0.12862585484981537, "learning_rate": 2.5e-05, "loss": 0.9396833777427673, "step": 327 }, { "epoch": 0.40245398773006136, "grad_norm": 0.13219887018203735, "learning_rate": 2.5076687116564416e-05, "loss": 0.9627938270568848, "step": 328 }, { "epoch": 0.403680981595092, "grad_norm": 0.14276766777038574, "learning_rate": 2.5153374233128835e-05, "loss": 1.0780998468399048, "step": 329 }, { "epoch": 0.4049079754601227, "grad_norm": 0.13761764764785767, "learning_rate": 2.523006134969325e-05, "loss": 0.9467271566390991, "step": 330 }, { "epoch": 0.40613496932515336, "grad_norm": 0.14368735253810883, "learning_rate": 2.530674846625767e-05, "loss": 1.1528966426849365, "step": 331 }, { "epoch": 0.40736196319018403, "grad_norm": 0.13917796313762665, "learning_rate": 2.5383435582822084e-05, "loss": 0.9774558544158936, "step": 332 }, { "epoch": 0.4085889570552147, "grad_norm": 0.13712440431118011, "learning_rate": 2.5460122699386503e-05, "loss": 1.0110121965408325, "step": 333 }, { "epoch": 0.4098159509202454, "grad_norm": 0.14411571621894836, "learning_rate": 2.5536809815950925e-05, "loss": 1.047809362411499, "step": 334 }, { "epoch": 0.4110429447852761, "grad_norm": 0.15931838750839233, "learning_rate": 2.561349693251534e-05, "loss": 1.0592668056488037, "step": 335 }, { "epoch": 0.41226993865030676, "grad_norm": 0.15905073285102844, "learning_rate": 2.569018404907976e-05, "loss": 1.0245099067687988, "step": 336 }, { "epoch": 0.4134969325153374, "grad_norm": 0.3735513985157013, "learning_rate": 2.5766871165644174e-05, "loss": 1.2064604759216309, "step": 337 }, { "epoch": 0.4147239263803681, "grad_norm": 0.12237963825464249, "learning_rate": 2.5843558282208592e-05, "loss": 1.0669598579406738, "step": 338 }, { "epoch": 0.41595092024539876, "grad_norm": 0.13915956020355225, "learning_rate": 2.5920245398773008e-05, "loss": 1.0896600484848022, "step": 339 }, { "epoch": 0.4171779141104294, "grad_norm": 0.15436965227127075, "learning_rate": 2.5996932515337423e-05, "loss": 1.0567042827606201, "step": 340 }, { "epoch": 0.41840490797546015, "grad_norm": 0.14979365468025208, "learning_rate": 2.607361963190184e-05, "loss": 1.0921344757080078, "step": 341 }, { "epoch": 0.4196319018404908, "grad_norm": 0.1607174277305603, "learning_rate": 2.6150306748466257e-05, "loss": 0.9626774787902832, "step": 342 }, { "epoch": 0.4208588957055215, "grad_norm": 0.12983466684818268, "learning_rate": 2.6226993865030675e-05, "loss": 1.0448408126831055, "step": 343 }, { "epoch": 0.42208588957055215, "grad_norm": 0.14138346910476685, "learning_rate": 2.630368098159509e-05, "loss": 1.0418914556503296, "step": 344 }, { "epoch": 0.4233128834355828, "grad_norm": 0.15338674187660217, "learning_rate": 2.638036809815951e-05, "loss": 1.0211224555969238, "step": 345 }, { "epoch": 0.4245398773006135, "grad_norm": 0.16042347252368927, "learning_rate": 2.645705521472393e-05, "loss": 1.1030805110931396, "step": 346 }, { "epoch": 0.42576687116564416, "grad_norm": 0.12567435204982758, "learning_rate": 2.6533742331288346e-05, "loss": 0.9175524711608887, "step": 347 }, { "epoch": 0.4269938650306748, "grad_norm": 0.17399585247039795, "learning_rate": 2.6610429447852765e-05, "loss": 0.9988354444503784, "step": 348 }, { "epoch": 0.42822085889570555, "grad_norm": 0.12889741361141205, "learning_rate": 2.668711656441718e-05, "loss": 1.1514885425567627, "step": 349 }, { "epoch": 0.4294478527607362, "grad_norm": 0.14619231224060059, "learning_rate": 2.67638036809816e-05, "loss": 1.0138335227966309, "step": 350 }, { "epoch": 0.4306748466257669, "grad_norm": 0.14815452694892883, "learning_rate": 2.6840490797546014e-05, "loss": 1.0424927473068237, "step": 351 }, { "epoch": 0.43190184049079755, "grad_norm": 0.18256254494190216, "learning_rate": 2.6917177914110432e-05, "loss": 0.8433379530906677, "step": 352 }, { "epoch": 0.4331288343558282, "grad_norm": 0.13799375295639038, "learning_rate": 2.6993865030674848e-05, "loss": 1.0215107202529907, "step": 353 }, { "epoch": 0.4343558282208589, "grad_norm": 0.14031174778938293, "learning_rate": 2.7070552147239263e-05, "loss": 0.9818652868270874, "step": 354 }, { "epoch": 0.43558282208588955, "grad_norm": 0.16724731028079987, "learning_rate": 2.714723926380368e-05, "loss": 1.0153993368148804, "step": 355 }, { "epoch": 0.4368098159509202, "grad_norm": 0.12236975878477097, "learning_rate": 2.7223926380368097e-05, "loss": 1.173166275024414, "step": 356 }, { "epoch": 0.43803680981595094, "grad_norm": 0.14538972079753876, "learning_rate": 2.7300613496932515e-05, "loss": 1.0236756801605225, "step": 357 }, { "epoch": 0.4392638036809816, "grad_norm": 0.17985936999320984, "learning_rate": 2.737730061349693e-05, "loss": 0.8765881061553955, "step": 358 }, { "epoch": 0.4404907975460123, "grad_norm": 0.13249680399894714, "learning_rate": 2.7453987730061353e-05, "loss": 1.060772180557251, "step": 359 }, { "epoch": 0.44171779141104295, "grad_norm": 0.13048824667930603, "learning_rate": 2.753067484662577e-05, "loss": 1.0392584800720215, "step": 360 }, { "epoch": 0.4429447852760736, "grad_norm": 0.14089339971542358, "learning_rate": 2.7607361963190186e-05, "loss": 1.0105173587799072, "step": 361 }, { "epoch": 0.4441717791411043, "grad_norm": 0.14135374128818512, "learning_rate": 2.7684049079754605e-05, "loss": 0.9565079212188721, "step": 362 }, { "epoch": 0.44539877300613495, "grad_norm": 0.17921406030654907, "learning_rate": 2.776073619631902e-05, "loss": 0.9671577215194702, "step": 363 }, { "epoch": 0.4466257668711656, "grad_norm": 0.16197779774665833, "learning_rate": 2.783742331288344e-05, "loss": 0.9703222513198853, "step": 364 }, { "epoch": 0.44785276073619634, "grad_norm": 0.14247509837150574, "learning_rate": 2.7914110429447854e-05, "loss": 1.0803967714309692, "step": 365 }, { "epoch": 0.449079754601227, "grad_norm": 0.15004371106624603, "learning_rate": 2.799079754601227e-05, "loss": 1.0626829862594604, "step": 366 }, { "epoch": 0.4503067484662577, "grad_norm": 0.14720168709754944, "learning_rate": 2.8067484662576688e-05, "loss": 1.0509107112884521, "step": 367 }, { "epoch": 0.45153374233128835, "grad_norm": 0.17678888142108917, "learning_rate": 2.8144171779141103e-05, "loss": 1.0586212873458862, "step": 368 }, { "epoch": 0.452760736196319, "grad_norm": 0.14187254011631012, "learning_rate": 2.822085889570552e-05, "loss": 0.9871550798416138, "step": 369 }, { "epoch": 0.4539877300613497, "grad_norm": 0.14095336198806763, "learning_rate": 2.8297546012269937e-05, "loss": 0.9352030754089355, "step": 370 }, { "epoch": 0.45521472392638035, "grad_norm": 0.12840639054775238, "learning_rate": 2.837423312883436e-05, "loss": 1.0710327625274658, "step": 371 }, { "epoch": 0.456441717791411, "grad_norm": 0.14072862267494202, "learning_rate": 2.8450920245398777e-05, "loss": 1.1011724472045898, "step": 372 }, { "epoch": 0.45766871165644174, "grad_norm": 0.15143389999866486, "learning_rate": 2.8527607361963193e-05, "loss": 1.126314640045166, "step": 373 }, { "epoch": 0.4588957055214724, "grad_norm": 0.14439180493354797, "learning_rate": 2.860429447852761e-05, "loss": 1.0581159591674805, "step": 374 }, { "epoch": 0.4601226993865031, "grad_norm": 0.1794329136610031, "learning_rate": 2.8680981595092026e-05, "loss": 1.0668919086456299, "step": 375 }, { "epoch": 0.46134969325153374, "grad_norm": 0.1328800767660141, "learning_rate": 2.8757668711656445e-05, "loss": 1.1326098442077637, "step": 376 }, { "epoch": 0.4625766871165644, "grad_norm": 0.15257145464420319, "learning_rate": 2.883435582822086e-05, "loss": 1.0535569190979004, "step": 377 }, { "epoch": 0.4638036809815951, "grad_norm": 0.17674346268177032, "learning_rate": 2.8911042944785276e-05, "loss": 0.9484614729881287, "step": 378 }, { "epoch": 0.46503067484662575, "grad_norm": 0.20298261940479279, "learning_rate": 2.8987730061349694e-05, "loss": 1.0386786460876465, "step": 379 }, { "epoch": 0.4662576687116564, "grad_norm": 0.1691139191389084, "learning_rate": 2.906441717791411e-05, "loss": 0.9482850432395935, "step": 380 }, { "epoch": 0.46748466257668714, "grad_norm": 0.17065995931625366, "learning_rate": 2.9141104294478528e-05, "loss": 0.9740985631942749, "step": 381 }, { "epoch": 0.4687116564417178, "grad_norm": 0.2869400978088379, "learning_rate": 2.9217791411042943e-05, "loss": 0.8781532049179077, "step": 382 }, { "epoch": 0.4699386503067485, "grad_norm": 0.16009368002414703, "learning_rate": 2.9294478527607362e-05, "loss": 0.9571852684020996, "step": 383 }, { "epoch": 0.47116564417177914, "grad_norm": 0.16674485802650452, "learning_rate": 2.9371165644171784e-05, "loss": 0.9591301679611206, "step": 384 }, { "epoch": 0.4723926380368098, "grad_norm": 0.18746590614318848, "learning_rate": 2.94478527607362e-05, "loss": 1.0060620307922363, "step": 385 }, { "epoch": 0.4736196319018405, "grad_norm": 0.2075481116771698, "learning_rate": 2.9524539877300618e-05, "loss": 0.7908092141151428, "step": 386 }, { "epoch": 0.47484662576687114, "grad_norm": 0.18013356626033783, "learning_rate": 2.9601226993865033e-05, "loss": 0.9635549187660217, "step": 387 }, { "epoch": 0.47607361963190187, "grad_norm": 0.1646333932876587, "learning_rate": 2.967791411042945e-05, "loss": 0.8931646347045898, "step": 388 }, { "epoch": 0.47730061349693254, "grad_norm": 0.16895435750484467, "learning_rate": 2.9754601226993867e-05, "loss": 1.0601506233215332, "step": 389 }, { "epoch": 0.4785276073619632, "grad_norm": 0.16723361611366272, "learning_rate": 2.9831288343558282e-05, "loss": 0.7506167888641357, "step": 390 }, { "epoch": 0.47975460122699387, "grad_norm": 0.16767138242721558, "learning_rate": 2.99079754601227e-05, "loss": 1.0490965843200684, "step": 391 }, { "epoch": 0.48098159509202454, "grad_norm": 0.17676138877868652, "learning_rate": 2.9984662576687116e-05, "loss": 0.9926307201385498, "step": 392 }, { "epoch": 0.4822085889570552, "grad_norm": 0.170121967792511, "learning_rate": 3.0061349693251534e-05, "loss": 0.972074031829834, "step": 393 }, { "epoch": 0.4834355828220859, "grad_norm": 0.1656096875667572, "learning_rate": 3.013803680981595e-05, "loss": 1.0180227756500244, "step": 394 }, { "epoch": 0.48466257668711654, "grad_norm": 0.15439537167549133, "learning_rate": 3.0214723926380368e-05, "loss": 0.962357223033905, "step": 395 }, { "epoch": 0.48588957055214727, "grad_norm": 0.1478506177663803, "learning_rate": 3.029141104294479e-05, "loss": 1.064382553100586, "step": 396 }, { "epoch": 0.48711656441717793, "grad_norm": 0.18902139365673065, "learning_rate": 3.0368098159509205e-05, "loss": 0.8336688280105591, "step": 397 }, { "epoch": 0.4883435582822086, "grad_norm": 0.1862400621175766, "learning_rate": 3.0444785276073624e-05, "loss": 0.9465341567993164, "step": 398 }, { "epoch": 0.48957055214723927, "grad_norm": 0.17175157368183136, "learning_rate": 3.052147239263804e-05, "loss": 1.0858125686645508, "step": 399 }, { "epoch": 0.49079754601226994, "grad_norm": 0.16732214391231537, "learning_rate": 3.059815950920246e-05, "loss": 0.961592435836792, "step": 400 }, { "epoch": 0.4920245398773006, "grad_norm": 0.16292540729045868, "learning_rate": 3.067484662576687e-05, "loss": 1.1122252941131592, "step": 401 }, { "epoch": 0.49325153374233127, "grad_norm": 0.14255374670028687, "learning_rate": 3.075153374233129e-05, "loss": 1.0646617412567139, "step": 402 }, { "epoch": 0.49447852760736194, "grad_norm": 0.1584572046995163, "learning_rate": 3.0828220858895703e-05, "loss": 0.9858182668685913, "step": 403 }, { "epoch": 0.49570552147239266, "grad_norm": 0.1792004555463791, "learning_rate": 3.0904907975460125e-05, "loss": 0.7464679479598999, "step": 404 }, { "epoch": 0.49693251533742333, "grad_norm": 0.1698596328496933, "learning_rate": 3.098159509202454e-05, "loss": 0.8794978857040405, "step": 405 }, { "epoch": 0.498159509202454, "grad_norm": 0.16383381187915802, "learning_rate": 3.1058282208588956e-05, "loss": 0.883232593536377, "step": 406 }, { "epoch": 0.49938650306748467, "grad_norm": 0.19811704754829407, "learning_rate": 3.113496932515337e-05, "loss": 1.1477901935577393, "step": 407 }, { "epoch": 0.5006134969325153, "grad_norm": 0.16155482828617096, "learning_rate": 3.121165644171779e-05, "loss": 0.9912044405937195, "step": 408 }, { "epoch": 0.501840490797546, "grad_norm": 0.14289511740207672, "learning_rate": 3.1288343558282215e-05, "loss": 1.0480196475982666, "step": 409 }, { "epoch": 0.5030674846625767, "grad_norm": 0.15720532834529877, "learning_rate": 3.136503067484663e-05, "loss": 0.9475855827331543, "step": 410 }, { "epoch": 0.5042944785276073, "grad_norm": 0.14176304638385773, "learning_rate": 3.1441717791411045e-05, "loss": 0.9789233207702637, "step": 411 }, { "epoch": 0.505521472392638, "grad_norm": 0.18856161832809448, "learning_rate": 3.151840490797546e-05, "loss": 0.9706493616104126, "step": 412 }, { "epoch": 0.5067484662576687, "grad_norm": 0.18467755615711212, "learning_rate": 3.159509202453988e-05, "loss": 1.1335866451263428, "step": 413 }, { "epoch": 0.5079754601226993, "grad_norm": 0.16826774179935455, "learning_rate": 3.16717791411043e-05, "loss": 0.9272061586380005, "step": 414 }, { "epoch": 0.50920245398773, "grad_norm": 0.1806141585111618, "learning_rate": 3.174846625766871e-05, "loss": 0.8536734580993652, "step": 415 }, { "epoch": 0.5104294478527608, "grad_norm": 0.18542692065238953, "learning_rate": 3.182515337423313e-05, "loss": 1.0285747051239014, "step": 416 }, { "epoch": 0.5116564417177915, "grad_norm": 0.13729947805404663, "learning_rate": 3.1901840490797544e-05, "loss": 1.0050827264785767, "step": 417 }, { "epoch": 0.5128834355828221, "grad_norm": 0.140880286693573, "learning_rate": 3.1978527607361966e-05, "loss": 1.0673481225967407, "step": 418 }, { "epoch": 0.5141104294478528, "grad_norm": 0.1483444720506668, "learning_rate": 3.205521472392638e-05, "loss": 1.0665265321731567, "step": 419 }, { "epoch": 0.5153374233128835, "grad_norm": 0.17398011684417725, "learning_rate": 3.2131901840490796e-05, "loss": 0.9840133190155029, "step": 420 }, { "epoch": 0.5165644171779141, "grad_norm": 0.18812794983386993, "learning_rate": 3.220858895705521e-05, "loss": 1.0003412961959839, "step": 421 }, { "epoch": 0.5177914110429448, "grad_norm": 0.14922228455543518, "learning_rate": 3.228527607361963e-05, "loss": 1.0319355726242065, "step": 422 }, { "epoch": 0.5190184049079755, "grad_norm": 0.16250623762607574, "learning_rate": 3.2361963190184055e-05, "loss": 0.9891963005065918, "step": 423 }, { "epoch": 0.5202453987730061, "grad_norm": 0.15244553983211517, "learning_rate": 3.243865030674847e-05, "loss": 0.9959155917167664, "step": 424 }, { "epoch": 0.5214723926380368, "grad_norm": 0.18644729256629944, "learning_rate": 3.2515337423312886e-05, "loss": 1.1361573934555054, "step": 425 }, { "epoch": 0.5226993865030675, "grad_norm": 0.19638444483280182, "learning_rate": 3.25920245398773e-05, "loss": 0.8184643983840942, "step": 426 }, { "epoch": 0.5239263803680981, "grad_norm": 0.21091853082180023, "learning_rate": 3.266871165644172e-05, "loss": 1.0344023704528809, "step": 427 }, { "epoch": 0.5251533742331288, "grad_norm": 0.15979039669036865, "learning_rate": 3.274539877300614e-05, "loss": 1.1045410633087158, "step": 428 }, { "epoch": 0.5263803680981595, "grad_norm": 0.13813886046409607, "learning_rate": 3.282208588957055e-05, "loss": 1.0541703701019287, "step": 429 }, { "epoch": 0.5276073619631901, "grad_norm": 0.1610398292541504, "learning_rate": 3.289877300613497e-05, "loss": 0.9751132726669312, "step": 430 }, { "epoch": 0.5288343558282208, "grad_norm": 0.21102315187454224, "learning_rate": 3.2975460122699384e-05, "loss": 0.8938742876052856, "step": 431 }, { "epoch": 0.5300613496932516, "grad_norm": 0.14479491114616394, "learning_rate": 3.3052147239263806e-05, "loss": 1.0795782804489136, "step": 432 }, { "epoch": 0.5312883435582823, "grad_norm": 0.1823873519897461, "learning_rate": 3.312883435582822e-05, "loss": 0.9632036685943604, "step": 433 }, { "epoch": 0.5325153374233129, "grad_norm": 0.1563059389591217, "learning_rate": 3.320552147239264e-05, "loss": 0.8528692722320557, "step": 434 }, { "epoch": 0.5337423312883436, "grad_norm": 0.15667127072811127, "learning_rate": 3.328220858895706e-05, "loss": 1.1205227375030518, "step": 435 }, { "epoch": 0.5349693251533743, "grad_norm": 0.16287066042423248, "learning_rate": 3.335889570552147e-05, "loss": 0.949151337146759, "step": 436 }, { "epoch": 0.5361963190184049, "grad_norm": 0.13826237618923187, "learning_rate": 3.3435582822085895e-05, "loss": 0.9871480464935303, "step": 437 }, { "epoch": 0.5374233128834356, "grad_norm": 0.2004631906747818, "learning_rate": 3.351226993865031e-05, "loss": 1.033210039138794, "step": 438 }, { "epoch": 0.5386503067484663, "grad_norm": 0.15636901557445526, "learning_rate": 3.3588957055214726e-05, "loss": 0.9971957802772522, "step": 439 }, { "epoch": 0.5398773006134969, "grad_norm": 0.14159618318080902, "learning_rate": 3.366564417177914e-05, "loss": 1.1186002492904663, "step": 440 }, { "epoch": 0.5411042944785276, "grad_norm": 0.20502841472625732, "learning_rate": 3.3742331288343556e-05, "loss": 1.095931053161621, "step": 441 }, { "epoch": 0.5423312883435583, "grad_norm": 0.16007456183433533, "learning_rate": 3.381901840490798e-05, "loss": 1.0931950807571411, "step": 442 }, { "epoch": 0.5435582822085889, "grad_norm": 0.17049135267734528, "learning_rate": 3.3895705521472393e-05, "loss": 0.9500782489776611, "step": 443 }, { "epoch": 0.5447852760736196, "grad_norm": 0.15217125415802002, "learning_rate": 3.397239263803681e-05, "loss": 1.0284218788146973, "step": 444 }, { "epoch": 0.5460122699386503, "grad_norm": 0.19467036426067352, "learning_rate": 3.4049079754601224e-05, "loss": 0.8528940677642822, "step": 445 }, { "epoch": 0.5472392638036809, "grad_norm": 0.1689079999923706, "learning_rate": 3.4125766871165646e-05, "loss": 1.0247323513031006, "step": 446 }, { "epoch": 0.5484662576687117, "grad_norm": 0.15952327847480774, "learning_rate": 3.420245398773007e-05, "loss": 1.0116055011749268, "step": 447 }, { "epoch": 0.5496932515337424, "grad_norm": 0.17348895967006683, "learning_rate": 3.427914110429448e-05, "loss": 1.0074721574783325, "step": 448 }, { "epoch": 0.550920245398773, "grad_norm": 0.16526681184768677, "learning_rate": 3.43558282208589e-05, "loss": 0.9616392254829407, "step": 449 }, { "epoch": 0.5521472392638037, "grad_norm": 0.17381709814071655, "learning_rate": 3.4432515337423313e-05, "loss": 0.9868429899215698, "step": 450 }, { "epoch": 0.5533742331288344, "grad_norm": 0.1362922191619873, "learning_rate": 3.4509202453987735e-05, "loss": 1.1241936683654785, "step": 451 }, { "epoch": 0.554601226993865, "grad_norm": 0.1481960117816925, "learning_rate": 3.458588957055215e-05, "loss": 1.012056827545166, "step": 452 }, { "epoch": 0.5558282208588957, "grad_norm": 0.18026714026927948, "learning_rate": 3.4662576687116566e-05, "loss": 1.1275293827056885, "step": 453 }, { "epoch": 0.5570552147239264, "grad_norm": 0.17534935474395752, "learning_rate": 3.473926380368098e-05, "loss": 1.0973937511444092, "step": 454 }, { "epoch": 0.558282208588957, "grad_norm": 0.15328676998615265, "learning_rate": 3.4815950920245396e-05, "loss": 1.063830852508545, "step": 455 }, { "epoch": 0.5595092024539877, "grad_norm": 0.17416295409202576, "learning_rate": 3.489263803680982e-05, "loss": 0.9323302507400513, "step": 456 }, { "epoch": 0.5607361963190184, "grad_norm": 0.1527651697397232, "learning_rate": 3.4969325153374234e-05, "loss": 0.994120180606842, "step": 457 }, { "epoch": 0.561963190184049, "grad_norm": 0.18227113783359528, "learning_rate": 3.504601226993865e-05, "loss": 1.0911952257156372, "step": 458 }, { "epoch": 0.5631901840490797, "grad_norm": 0.16437362134456635, "learning_rate": 3.512269938650307e-05, "loss": 1.0941689014434814, "step": 459 }, { "epoch": 0.5644171779141104, "grad_norm": 0.1831217259168625, "learning_rate": 3.5199386503067486e-05, "loss": 0.9666903614997864, "step": 460 }, { "epoch": 0.5656441717791411, "grad_norm": 0.17628070712089539, "learning_rate": 3.527607361963191e-05, "loss": 0.9058413505554199, "step": 461 }, { "epoch": 0.5668711656441717, "grad_norm": 0.1623339205980301, "learning_rate": 3.535276073619632e-05, "loss": 0.9201177954673767, "step": 462 }, { "epoch": 0.5680981595092025, "grad_norm": 0.21021446585655212, "learning_rate": 3.542944785276074e-05, "loss": 0.8776906728744507, "step": 463 }, { "epoch": 0.5693251533742332, "grad_norm": 0.1577741503715515, "learning_rate": 3.5506134969325154e-05, "loss": 1.0768144130706787, "step": 464 }, { "epoch": 0.5705521472392638, "grad_norm": 0.17327581346035004, "learning_rate": 3.558282208588957e-05, "loss": 1.0555448532104492, "step": 465 }, { "epoch": 0.5717791411042945, "grad_norm": 0.15420399606227875, "learning_rate": 3.565950920245399e-05, "loss": 0.9727882742881775, "step": 466 }, { "epoch": 0.5730061349693252, "grad_norm": 0.14699219167232513, "learning_rate": 3.5736196319018406e-05, "loss": 1.143357276916504, "step": 467 }, { "epoch": 0.5742331288343558, "grad_norm": 0.15191036462783813, "learning_rate": 3.581288343558282e-05, "loss": 1.076880931854248, "step": 468 }, { "epoch": 0.5754601226993865, "grad_norm": 0.16430675983428955, "learning_rate": 3.5889570552147236e-05, "loss": 0.945013701915741, "step": 469 }, { "epoch": 0.5766871165644172, "grad_norm": 0.2608495354652405, "learning_rate": 3.596625766871166e-05, "loss": 0.9734224081039429, "step": 470 }, { "epoch": 0.5779141104294478, "grad_norm": 0.1591901183128357, "learning_rate": 3.6042944785276074e-05, "loss": 1.020780086517334, "step": 471 }, { "epoch": 0.5791411042944785, "grad_norm": 0.16627655923366547, "learning_rate": 3.6119631901840496e-05, "loss": 0.9145815968513489, "step": 472 }, { "epoch": 0.5803680981595092, "grad_norm": 0.1900457888841629, "learning_rate": 3.619631901840491e-05, "loss": 1.2936384677886963, "step": 473 }, { "epoch": 0.5815950920245399, "grad_norm": 0.15158432722091675, "learning_rate": 3.6273006134969326e-05, "loss": 1.0011953115463257, "step": 474 }, { "epoch": 0.5828220858895705, "grad_norm": 0.1826300024986267, "learning_rate": 3.634969325153375e-05, "loss": 1.008684754371643, "step": 475 }, { "epoch": 0.5840490797546012, "grad_norm": 0.23823684453964233, "learning_rate": 3.642638036809816e-05, "loss": 0.9670805931091309, "step": 476 }, { "epoch": 0.5852760736196319, "grad_norm": 0.18894422054290771, "learning_rate": 3.650306748466258e-05, "loss": 1.031349778175354, "step": 477 }, { "epoch": 0.5865030674846625, "grad_norm": 0.15077471733093262, "learning_rate": 3.6579754601226994e-05, "loss": 1.1049787998199463, "step": 478 }, { "epoch": 0.5877300613496933, "grad_norm": 0.1775146722793579, "learning_rate": 3.665644171779141e-05, "loss": 1.0926228761672974, "step": 479 }, { "epoch": 0.588957055214724, "grad_norm": 0.15029749274253845, "learning_rate": 3.673312883435583e-05, "loss": 1.0261876583099365, "step": 480 }, { "epoch": 0.5901840490797546, "grad_norm": 0.17414364218711853, "learning_rate": 3.6809815950920246e-05, "loss": 0.9804202318191528, "step": 481 }, { "epoch": 0.5914110429447853, "grad_norm": 0.18401874601840973, "learning_rate": 3.688650306748466e-05, "loss": 1.0506110191345215, "step": 482 }, { "epoch": 0.592638036809816, "grad_norm": 0.1374252587556839, "learning_rate": 3.696319018404908e-05, "loss": 1.019785761833191, "step": 483 }, { "epoch": 0.5938650306748466, "grad_norm": 0.17062249779701233, "learning_rate": 3.70398773006135e-05, "loss": 0.9169716835021973, "step": 484 }, { "epoch": 0.5950920245398773, "grad_norm": 0.149411141872406, "learning_rate": 3.711656441717792e-05, "loss": 0.967678427696228, "step": 485 }, { "epoch": 0.596319018404908, "grad_norm": 0.1812339872121811, "learning_rate": 3.7193251533742336e-05, "loss": 1.0318129062652588, "step": 486 }, { "epoch": 0.5975460122699386, "grad_norm": 0.1578977406024933, "learning_rate": 3.726993865030675e-05, "loss": 1.075181484222412, "step": 487 }, { "epoch": 0.5987730061349693, "grad_norm": 0.17941473424434662, "learning_rate": 3.7346625766871166e-05, "loss": 0.9480218887329102, "step": 488 }, { "epoch": 0.6, "grad_norm": 0.1542184054851532, "learning_rate": 3.742331288343559e-05, "loss": 1.0697810649871826, "step": 489 }, { "epoch": 0.6012269938650306, "grad_norm": 0.1600131392478943, "learning_rate": 3.7500000000000003e-05, "loss": 0.9158498048782349, "step": 490 }, { "epoch": 0.6024539877300613, "grad_norm": 0.17138931155204773, "learning_rate": 3.757668711656442e-05, "loss": 0.933313250541687, "step": 491 }, { "epoch": 0.603680981595092, "grad_norm": 0.18892382085323334, "learning_rate": 3.7653374233128834e-05, "loss": 0.9408615231513977, "step": 492 }, { "epoch": 0.6049079754601226, "grad_norm": 0.1741059273481369, "learning_rate": 3.773006134969325e-05, "loss": 0.9390970468521118, "step": 493 }, { "epoch": 0.6061349693251534, "grad_norm": 0.19972698390483856, "learning_rate": 3.780674846625767e-05, "loss": 0.9239737391471863, "step": 494 }, { "epoch": 0.6073619631901841, "grad_norm": 0.1774204820394516, "learning_rate": 3.7883435582822086e-05, "loss": 0.8995852470397949, "step": 495 }, { "epoch": 0.6085889570552148, "grad_norm": 0.1831020712852478, "learning_rate": 3.79601226993865e-05, "loss": 0.8956073522567749, "step": 496 }, { "epoch": 0.6098159509202454, "grad_norm": 0.1630912572145462, "learning_rate": 3.8036809815950924e-05, "loss": 0.9340415596961975, "step": 497 }, { "epoch": 0.6110429447852761, "grad_norm": 0.17053760588169098, "learning_rate": 3.811349693251534e-05, "loss": 1.0308623313903809, "step": 498 }, { "epoch": 0.6122699386503068, "grad_norm": 0.16596661508083344, "learning_rate": 3.819018404907976e-05, "loss": 1.0936815738677979, "step": 499 }, { "epoch": 0.6134969325153374, "grad_norm": 0.1950528472661972, "learning_rate": 3.8266871165644176e-05, "loss": 1.0250592231750488, "step": 500 }, { "epoch": 0.6147239263803681, "grad_norm": 0.20499587059020996, "learning_rate": 3.834355828220859e-05, "loss": 0.9192500114440918, "step": 501 }, { "epoch": 0.6159509202453988, "grad_norm": 0.15862593054771423, "learning_rate": 3.8420245398773006e-05, "loss": 1.0153205394744873, "step": 502 }, { "epoch": 0.6171779141104294, "grad_norm": 0.15574924647808075, "learning_rate": 3.849693251533742e-05, "loss": 1.034704566001892, "step": 503 }, { "epoch": 0.6184049079754601, "grad_norm": 0.15021435916423798, "learning_rate": 3.8573619631901844e-05, "loss": 0.8727020025253296, "step": 504 }, { "epoch": 0.6196319018404908, "grad_norm": 0.13106480240821838, "learning_rate": 3.865030674846626e-05, "loss": 1.0534900426864624, "step": 505 }, { "epoch": 0.6208588957055214, "grad_norm": 0.22368694841861725, "learning_rate": 3.8726993865030674e-05, "loss": 1.0694952011108398, "step": 506 }, { "epoch": 0.6220858895705521, "grad_norm": 0.17843125760555267, "learning_rate": 3.880368098159509e-05, "loss": 0.8841264247894287, "step": 507 }, { "epoch": 0.6233128834355828, "grad_norm": 0.20412850379943848, "learning_rate": 3.888036809815951e-05, "loss": 0.9213881492614746, "step": 508 }, { "epoch": 0.6245398773006134, "grad_norm": 0.16681833565235138, "learning_rate": 3.895705521472393e-05, "loss": 0.9441587328910828, "step": 509 }, { "epoch": 0.6257668711656442, "grad_norm": 0.18575482070446014, "learning_rate": 3.903374233128835e-05, "loss": 1.0887091159820557, "step": 510 }, { "epoch": 0.6269938650306749, "grad_norm": 0.1693565994501114, "learning_rate": 3.9110429447852764e-05, "loss": 0.919905960559845, "step": 511 }, { "epoch": 0.6282208588957056, "grad_norm": 0.17014120519161224, "learning_rate": 3.918711656441718e-05, "loss": 0.8990117907524109, "step": 512 }, { "epoch": 0.6294478527607362, "grad_norm": 0.2139388918876648, "learning_rate": 3.92638036809816e-05, "loss": 0.8704589605331421, "step": 513 }, { "epoch": 0.6306748466257669, "grad_norm": 0.20340496301651, "learning_rate": 3.9340490797546016e-05, "loss": 0.9490032196044922, "step": 514 }, { "epoch": 0.6319018404907976, "grad_norm": 0.14138160645961761, "learning_rate": 3.941717791411043e-05, "loss": 1.053372859954834, "step": 515 }, { "epoch": 0.6331288343558282, "grad_norm": 0.14066778123378754, "learning_rate": 3.9493865030674847e-05, "loss": 0.9890807867050171, "step": 516 }, { "epoch": 0.6343558282208589, "grad_norm": 0.15723535418510437, "learning_rate": 3.957055214723926e-05, "loss": 1.0118193626403809, "step": 517 }, { "epoch": 0.6355828220858896, "grad_norm": 0.15299800038337708, "learning_rate": 3.9647239263803684e-05, "loss": 0.8512934446334839, "step": 518 }, { "epoch": 0.6368098159509202, "grad_norm": 0.17575867474079132, "learning_rate": 3.97239263803681e-05, "loss": 0.9099797606468201, "step": 519 }, { "epoch": 0.6380368098159509, "grad_norm": 0.1968258172273636, "learning_rate": 3.9800613496932514e-05, "loss": 0.9136043787002563, "step": 520 }, { "epoch": 0.6392638036809816, "grad_norm": 0.18685537576675415, "learning_rate": 3.987730061349693e-05, "loss": 0.9329707622528076, "step": 521 }, { "epoch": 0.6404907975460122, "grad_norm": 0.18521611392498016, "learning_rate": 3.995398773006135e-05, "loss": 0.9958368539810181, "step": 522 }, { "epoch": 0.6417177914110429, "grad_norm": 0.1821265071630478, "learning_rate": 4.0030674846625773e-05, "loss": 1.0319890975952148, "step": 523 }, { "epoch": 0.6429447852760736, "grad_norm": 0.1748528778553009, "learning_rate": 4.010736196319019e-05, "loss": 0.9566287994384766, "step": 524 }, { "epoch": 0.6441717791411042, "grad_norm": 0.1833362579345703, "learning_rate": 4.0184049079754604e-05, "loss": 0.9451923370361328, "step": 525 }, { "epoch": 0.645398773006135, "grad_norm": 0.19453318417072296, "learning_rate": 4.026073619631902e-05, "loss": 1.0098438262939453, "step": 526 }, { "epoch": 0.6466257668711657, "grad_norm": 0.14237500727176666, "learning_rate": 4.033742331288344e-05, "loss": 1.054182767868042, "step": 527 }, { "epoch": 0.6478527607361964, "grad_norm": 0.1462438553571701, "learning_rate": 4.0414110429447856e-05, "loss": 0.9429514408111572, "step": 528 }, { "epoch": 0.649079754601227, "grad_norm": 0.15820056200027466, "learning_rate": 4.049079754601227e-05, "loss": 1.02299165725708, "step": 529 }, { "epoch": 0.6503067484662577, "grad_norm": 0.17923173308372498, "learning_rate": 4.056748466257669e-05, "loss": 0.9489091634750366, "step": 530 }, { "epoch": 0.6515337423312884, "grad_norm": 0.15117336809635162, "learning_rate": 4.06441717791411e-05, "loss": 1.0367777347564697, "step": 531 }, { "epoch": 0.652760736196319, "grad_norm": 0.14106644690036774, "learning_rate": 4.0720858895705524e-05, "loss": 0.9743503928184509, "step": 532 }, { "epoch": 0.6539877300613497, "grad_norm": 0.22310975193977356, "learning_rate": 4.079754601226994e-05, "loss": 0.9525736570358276, "step": 533 }, { "epoch": 0.6552147239263804, "grad_norm": 0.20235635340213776, "learning_rate": 4.087423312883436e-05, "loss": 1.1036714315414429, "step": 534 }, { "epoch": 0.656441717791411, "grad_norm": 0.20213042199611664, "learning_rate": 4.0950920245398776e-05, "loss": 0.9143022298812866, "step": 535 }, { "epoch": 0.6576687116564417, "grad_norm": 0.14894500374794006, "learning_rate": 4.102760736196319e-05, "loss": 1.070515513420105, "step": 536 }, { "epoch": 0.6588957055214724, "grad_norm": 0.20050230622291565, "learning_rate": 4.1104294478527614e-05, "loss": 0.8505071401596069, "step": 537 }, { "epoch": 0.660122699386503, "grad_norm": 0.192686527967453, "learning_rate": 4.118098159509203e-05, "loss": 1.0064101219177246, "step": 538 }, { "epoch": 0.6613496932515337, "grad_norm": 0.18348902463912964, "learning_rate": 4.1257668711656444e-05, "loss": 0.8274821043014526, "step": 539 }, { "epoch": 0.6625766871165644, "grad_norm": 0.1622048169374466, "learning_rate": 4.133435582822086e-05, "loss": 0.9108134508132935, "step": 540 }, { "epoch": 0.6638036809815951, "grad_norm": 0.21410737931728363, "learning_rate": 4.1411042944785274e-05, "loss": 1.0238808393478394, "step": 541 }, { "epoch": 0.6650306748466258, "grad_norm": 0.23994791507720947, "learning_rate": 4.1487730061349696e-05, "loss": 0.9783960580825806, "step": 542 }, { "epoch": 0.6662576687116565, "grad_norm": 0.1734274923801422, "learning_rate": 4.156441717791411e-05, "loss": 1.1298832893371582, "step": 543 }, { "epoch": 0.6674846625766871, "grad_norm": 0.2107182890176773, "learning_rate": 4.164110429447853e-05, "loss": 0.9766179323196411, "step": 544 }, { "epoch": 0.6687116564417178, "grad_norm": 0.20479802787303925, "learning_rate": 4.171779141104294e-05, "loss": 1.0351991653442383, "step": 545 }, { "epoch": 0.6699386503067485, "grad_norm": 0.20026709139347076, "learning_rate": 4.1794478527607364e-05, "loss": 0.9548765420913696, "step": 546 }, { "epoch": 0.6711656441717792, "grad_norm": 0.22591058909893036, "learning_rate": 4.1871165644171786e-05, "loss": 1.0410549640655518, "step": 547 }, { "epoch": 0.6723926380368098, "grad_norm": 0.18926258385181427, "learning_rate": 4.19478527607362e-05, "loss": 1.085874319076538, "step": 548 }, { "epoch": 0.6736196319018405, "grad_norm": 0.18537980318069458, "learning_rate": 4.2024539877300617e-05, "loss": 0.8486774563789368, "step": 549 }, { "epoch": 0.6748466257668712, "grad_norm": 0.1650088131427765, "learning_rate": 4.210122699386503e-05, "loss": 1.0503406524658203, "step": 550 }, { "epoch": 0.6760736196319018, "grad_norm": 0.19402043521404266, "learning_rate": 4.2177914110429454e-05, "loss": 0.823405385017395, "step": 551 }, { "epoch": 0.6773006134969325, "grad_norm": 0.1993020921945572, "learning_rate": 4.225460122699387e-05, "loss": 0.9638257622718811, "step": 552 }, { "epoch": 0.6785276073619632, "grad_norm": 0.14924974739551544, "learning_rate": 4.2331288343558284e-05, "loss": 1.0932658910751343, "step": 553 }, { "epoch": 0.6797546012269938, "grad_norm": 0.17289498448371887, "learning_rate": 4.24079754601227e-05, "loss": 0.8211942911148071, "step": 554 }, { "epoch": 0.6809815950920245, "grad_norm": 0.22855103015899658, "learning_rate": 4.2484662576687115e-05, "loss": 0.8961268663406372, "step": 555 }, { "epoch": 0.6822085889570552, "grad_norm": 0.16937513649463654, "learning_rate": 4.2561349693251537e-05, "loss": 0.9764866232872009, "step": 556 }, { "epoch": 0.6834355828220859, "grad_norm": 0.1677192598581314, "learning_rate": 4.263803680981595e-05, "loss": 1.0665500164031982, "step": 557 }, { "epoch": 0.6846625766871166, "grad_norm": 0.15220429003238678, "learning_rate": 4.271472392638037e-05, "loss": 1.0678634643554688, "step": 558 }, { "epoch": 0.6858895705521473, "grad_norm": 0.16780544817447662, "learning_rate": 4.279141104294479e-05, "loss": 1.0632802248001099, "step": 559 }, { "epoch": 0.6871165644171779, "grad_norm": 0.18702031672000885, "learning_rate": 4.2868098159509204e-05, "loss": 0.9599883556365967, "step": 560 }, { "epoch": 0.6883435582822086, "grad_norm": 0.25118646025657654, "learning_rate": 4.2944785276073626e-05, "loss": 0.900016188621521, "step": 561 }, { "epoch": 0.6895705521472393, "grad_norm": 0.17110291123390198, "learning_rate": 4.302147239263804e-05, "loss": 0.8138965368270874, "step": 562 }, { "epoch": 0.69079754601227, "grad_norm": 0.17040520906448364, "learning_rate": 4.309815950920246e-05, "loss": 0.7933363914489746, "step": 563 }, { "epoch": 0.6920245398773006, "grad_norm": 0.18115730583667755, "learning_rate": 4.317484662576687e-05, "loss": 1.105626106262207, "step": 564 }, { "epoch": 0.6932515337423313, "grad_norm": 0.17138099670410156, "learning_rate": 4.3251533742331294e-05, "loss": 0.9096375703811646, "step": 565 }, { "epoch": 0.694478527607362, "grad_norm": 0.17400647699832916, "learning_rate": 4.332822085889571e-05, "loss": 0.9662379026412964, "step": 566 }, { "epoch": 0.6957055214723926, "grad_norm": 0.18994121253490448, "learning_rate": 4.3404907975460124e-05, "loss": 0.9683457612991333, "step": 567 }, { "epoch": 0.6969325153374233, "grad_norm": 0.18459174036979675, "learning_rate": 4.348159509202454e-05, "loss": 0.9209092855453491, "step": 568 }, { "epoch": 0.698159509202454, "grad_norm": 0.21148541569709778, "learning_rate": 4.3558282208588955e-05, "loss": 0.8938145637512207, "step": 569 }, { "epoch": 0.6993865030674846, "grad_norm": 0.15604908764362335, "learning_rate": 4.363496932515338e-05, "loss": 0.9894592761993408, "step": 570 }, { "epoch": 0.7006134969325153, "grad_norm": 0.18302638828754425, "learning_rate": 4.371165644171779e-05, "loss": 1.0435434579849243, "step": 571 }, { "epoch": 0.701840490797546, "grad_norm": 0.21112249791622162, "learning_rate": 4.3788343558282214e-05, "loss": 0.929439902305603, "step": 572 }, { "epoch": 0.7030674846625767, "grad_norm": 0.1755427122116089, "learning_rate": 4.386503067484663e-05, "loss": 1.0110814571380615, "step": 573 }, { "epoch": 0.7042944785276074, "grad_norm": 0.2175496369600296, "learning_rate": 4.3941717791411044e-05, "loss": 0.9076226353645325, "step": 574 }, { "epoch": 0.7055214723926381, "grad_norm": 0.15220367908477783, "learning_rate": 4.4018404907975466e-05, "loss": 1.0109628438949585, "step": 575 }, { "epoch": 0.7067484662576687, "grad_norm": 0.17558680474758148, "learning_rate": 4.409509202453988e-05, "loss": 1.1173778772354126, "step": 576 }, { "epoch": 0.7079754601226994, "grad_norm": 0.21536490321159363, "learning_rate": 4.41717791411043e-05, "loss": 0.9092915654182434, "step": 577 }, { "epoch": 0.7092024539877301, "grad_norm": 0.1748882532119751, "learning_rate": 4.424846625766871e-05, "loss": 0.9788400530815125, "step": 578 }, { "epoch": 0.7104294478527607, "grad_norm": 0.17524121701717377, "learning_rate": 4.432515337423313e-05, "loss": 0.9155781269073486, "step": 579 }, { "epoch": 0.7116564417177914, "grad_norm": 0.17583546042442322, "learning_rate": 4.440184049079755e-05, "loss": 0.9135163426399231, "step": 580 }, { "epoch": 0.7128834355828221, "grad_norm": 0.16192376613616943, "learning_rate": 4.4478527607361964e-05, "loss": 0.9080080986022949, "step": 581 }, { "epoch": 0.7141104294478527, "grad_norm": 0.2026655673980713, "learning_rate": 4.455521472392638e-05, "loss": 0.9623035192489624, "step": 582 }, { "epoch": 0.7153374233128834, "grad_norm": 0.20937016606330872, "learning_rate": 4.4631901840490795e-05, "loss": 0.7942914366722107, "step": 583 }, { "epoch": 0.7165644171779141, "grad_norm": 0.18065683543682098, "learning_rate": 4.470858895705522e-05, "loss": 0.9468827247619629, "step": 584 }, { "epoch": 0.7177914110429447, "grad_norm": 0.1419723629951477, "learning_rate": 4.478527607361964e-05, "loss": 0.947795033454895, "step": 585 }, { "epoch": 0.7190184049079754, "grad_norm": 0.18902689218521118, "learning_rate": 4.4861963190184054e-05, "loss": 0.8803184032440186, "step": 586 }, { "epoch": 0.7202453987730061, "grad_norm": 0.1485278159379959, "learning_rate": 4.493865030674847e-05, "loss": 1.0116631984710693, "step": 587 }, { "epoch": 0.7214723926380369, "grad_norm": 0.1427728831768036, "learning_rate": 4.5015337423312885e-05, "loss": 1.0061039924621582, "step": 588 }, { "epoch": 0.7226993865030675, "grad_norm": 0.1559455692768097, "learning_rate": 4.5092024539877307e-05, "loss": 0.9325137138366699, "step": 589 }, { "epoch": 0.7239263803680982, "grad_norm": 0.16962383687496185, "learning_rate": 4.516871165644172e-05, "loss": 0.9016996026039124, "step": 590 }, { "epoch": 0.7251533742331289, "grad_norm": 0.14577080309391022, "learning_rate": 4.524539877300614e-05, "loss": 0.9985671043395996, "step": 591 }, { "epoch": 0.7263803680981595, "grad_norm": 0.19463953375816345, "learning_rate": 4.532208588957055e-05, "loss": 0.9090275764465332, "step": 592 }, { "epoch": 0.7276073619631902, "grad_norm": 0.16961133480072021, "learning_rate": 4.539877300613497e-05, "loss": 0.9626045227050781, "step": 593 }, { "epoch": 0.7288343558282209, "grad_norm": 0.18332833051681519, "learning_rate": 4.547546012269939e-05, "loss": 0.837238609790802, "step": 594 }, { "epoch": 0.7300613496932515, "grad_norm": 0.22512634098529816, "learning_rate": 4.5552147239263805e-05, "loss": 0.8066216111183167, "step": 595 }, { "epoch": 0.7312883435582822, "grad_norm": 0.1660270243883133, "learning_rate": 4.562883435582822e-05, "loss": 0.9943774938583374, "step": 596 }, { "epoch": 0.7325153374233129, "grad_norm": 0.15967997908592224, "learning_rate": 4.570552147239264e-05, "loss": 1.0277411937713623, "step": 597 }, { "epoch": 0.7337423312883435, "grad_norm": 0.17870105803012848, "learning_rate": 4.578220858895706e-05, "loss": 0.9351155757904053, "step": 598 }, { "epoch": 0.7349693251533742, "grad_norm": 0.3552117347717285, "learning_rate": 4.585889570552148e-05, "loss": 0.8667378425598145, "step": 599 }, { "epoch": 0.7361963190184049, "grad_norm": 0.17209061980247498, "learning_rate": 4.5935582822085894e-05, "loss": 0.9346461296081543, "step": 600 }, { "epoch": 0.7374233128834355, "grad_norm": 0.14244024455547333, "learning_rate": 4.601226993865031e-05, "loss": 0.8951839208602905, "step": 601 }, { "epoch": 0.7386503067484662, "grad_norm": 0.14845682680606842, "learning_rate": 4.6088957055214725e-05, "loss": 0.8537955284118652, "step": 602 }, { "epoch": 0.7398773006134969, "grad_norm": 0.1852504014968872, "learning_rate": 4.616564417177914e-05, "loss": 1.0414851903915405, "step": 603 }, { "epoch": 0.7411042944785277, "grad_norm": 0.17578375339508057, "learning_rate": 4.624233128834356e-05, "loss": 1.0510945320129395, "step": 604 }, { "epoch": 0.7423312883435583, "grad_norm": 0.21593140065670013, "learning_rate": 4.631901840490798e-05, "loss": 0.9380607604980469, "step": 605 }, { "epoch": 0.743558282208589, "grad_norm": 0.19006113708019257, "learning_rate": 4.639570552147239e-05, "loss": 0.911133348941803, "step": 606 }, { "epoch": 0.7447852760736197, "grad_norm": 0.17682643234729767, "learning_rate": 4.647239263803681e-05, "loss": 0.8262341022491455, "step": 607 }, { "epoch": 0.7460122699386503, "grad_norm": 0.16539201140403748, "learning_rate": 4.654907975460123e-05, "loss": 1.023884892463684, "step": 608 }, { "epoch": 0.747239263803681, "grad_norm": 0.16158737242221832, "learning_rate": 4.6625766871165645e-05, "loss": 0.884002685546875, "step": 609 }, { "epoch": 0.7484662576687117, "grad_norm": 0.1605844348669052, "learning_rate": 4.670245398773007e-05, "loss": 0.9956957101821899, "step": 610 }, { "epoch": 0.7496932515337423, "grad_norm": 0.1617811769247055, "learning_rate": 4.677914110429448e-05, "loss": 0.9355655908584595, "step": 611 }, { "epoch": 0.750920245398773, "grad_norm": 0.19090676307678223, "learning_rate": 4.68558282208589e-05, "loss": 0.91180819272995, "step": 612 }, { "epoch": 0.7521472392638037, "grad_norm": 0.19298182427883148, "learning_rate": 4.693251533742332e-05, "loss": 0.9095517992973328, "step": 613 }, { "epoch": 0.7533742331288343, "grad_norm": 0.18086522817611694, "learning_rate": 4.7009202453987734e-05, "loss": 0.9931987524032593, "step": 614 }, { "epoch": 0.754601226993865, "grad_norm": 0.1705838292837143, "learning_rate": 4.708588957055215e-05, "loss": 0.902172327041626, "step": 615 }, { "epoch": 0.7558282208588957, "grad_norm": 0.1732853651046753, "learning_rate": 4.7162576687116565e-05, "loss": 0.9617128968238831, "step": 616 }, { "epoch": 0.7570552147239263, "grad_norm": 0.1905973255634308, "learning_rate": 4.723926380368098e-05, "loss": 0.8427364826202393, "step": 617 }, { "epoch": 0.758282208588957, "grad_norm": 0.20606712996959686, "learning_rate": 4.73159509202454e-05, "loss": 0.9220590591430664, "step": 618 }, { "epoch": 0.7595092024539877, "grad_norm": 0.20174963772296906, "learning_rate": 4.739263803680982e-05, "loss": 1.0543793439865112, "step": 619 }, { "epoch": 0.7607361963190185, "grad_norm": 0.19647692143917084, "learning_rate": 4.746932515337423e-05, "loss": 0.8513867855072021, "step": 620 }, { "epoch": 0.7619631901840491, "grad_norm": 0.19531606137752533, "learning_rate": 4.754601226993865e-05, "loss": 0.8831640481948853, "step": 621 }, { "epoch": 0.7631901840490798, "grad_norm": 0.2009778916835785, "learning_rate": 4.762269938650307e-05, "loss": 0.9221029281616211, "step": 622 }, { "epoch": 0.7644171779141105, "grad_norm": 0.1797734797000885, "learning_rate": 4.769938650306749e-05, "loss": 0.9094139933586121, "step": 623 }, { "epoch": 0.7656441717791411, "grad_norm": 0.20489978790283203, "learning_rate": 4.777607361963191e-05, "loss": 0.9617691040039062, "step": 624 }, { "epoch": 0.7668711656441718, "grad_norm": 0.19291305541992188, "learning_rate": 4.785276073619632e-05, "loss": 0.8863768577575684, "step": 625 }, { "epoch": 0.7680981595092025, "grad_norm": 0.18293823301792145, "learning_rate": 4.792944785276074e-05, "loss": 0.9635242223739624, "step": 626 }, { "epoch": 0.7693251533742331, "grad_norm": 0.178012877702713, "learning_rate": 4.800613496932516e-05, "loss": 0.9163222312927246, "step": 627 }, { "epoch": 0.7705521472392638, "grad_norm": 0.14388804137706757, "learning_rate": 4.8082822085889575e-05, "loss": 1.0447555780410767, "step": 628 }, { "epoch": 0.7717791411042945, "grad_norm": 0.1709805279970169, "learning_rate": 4.815950920245399e-05, "loss": 1.0518419742584229, "step": 629 }, { "epoch": 0.7730061349693251, "grad_norm": 0.18977124989032745, "learning_rate": 4.8236196319018405e-05, "loss": 1.0235449075698853, "step": 630 }, { "epoch": 0.7742331288343558, "grad_norm": 0.24154452979564667, "learning_rate": 4.831288343558282e-05, "loss": 1.075380563735962, "step": 631 }, { "epoch": 0.7754601226993865, "grad_norm": 0.21992218494415283, "learning_rate": 4.838957055214724e-05, "loss": 0.9333031177520752, "step": 632 }, { "epoch": 0.7766871165644171, "grad_norm": 0.22585882246494293, "learning_rate": 4.846625766871166e-05, "loss": 0.8081429600715637, "step": 633 }, { "epoch": 0.7779141104294478, "grad_norm": 0.1660272777080536, "learning_rate": 4.854294478527607e-05, "loss": 0.9542590975761414, "step": 634 }, { "epoch": 0.7791411042944786, "grad_norm": 0.24937203526496887, "learning_rate": 4.8619631901840495e-05, "loss": 1.101457118988037, "step": 635 }, { "epoch": 0.7803680981595092, "grad_norm": 0.17112675309181213, "learning_rate": 4.869631901840491e-05, "loss": 0.8484828472137451, "step": 636 }, { "epoch": 0.7815950920245399, "grad_norm": 0.1914556622505188, "learning_rate": 4.877300613496933e-05, "loss": 0.9181028604507446, "step": 637 }, { "epoch": 0.7828220858895706, "grad_norm": 0.17863428592681885, "learning_rate": 4.884969325153375e-05, "loss": 0.9645162224769592, "step": 638 }, { "epoch": 0.7840490797546013, "grad_norm": 0.1837553232908249, "learning_rate": 4.892638036809816e-05, "loss": 0.9475194215774536, "step": 639 }, { "epoch": 0.7852760736196319, "grad_norm": 0.2036539614200592, "learning_rate": 4.900306748466258e-05, "loss": 0.911103367805481, "step": 640 }, { "epoch": 0.7865030674846626, "grad_norm": 0.16630731523036957, "learning_rate": 4.907975460122699e-05, "loss": 0.9762678146362305, "step": 641 }, { "epoch": 0.7877300613496933, "grad_norm": 0.19265447556972504, "learning_rate": 4.9156441717791415e-05, "loss": 1.0209988355636597, "step": 642 }, { "epoch": 0.7889570552147239, "grad_norm": 0.17337006330490112, "learning_rate": 4.923312883435583e-05, "loss": 1.037985920906067, "step": 643 }, { "epoch": 0.7901840490797546, "grad_norm": 0.1954217553138733, "learning_rate": 4.9309815950920245e-05, "loss": 0.8563196063041687, "step": 644 }, { "epoch": 0.7914110429447853, "grad_norm": 0.17619307339191437, "learning_rate": 4.938650306748466e-05, "loss": 0.9931352734565735, "step": 645 }, { "epoch": 0.7926380368098159, "grad_norm": 0.21911044418811798, "learning_rate": 4.946319018404908e-05, "loss": 1.0464510917663574, "step": 646 }, { "epoch": 0.7938650306748466, "grad_norm": 0.21951153874397278, "learning_rate": 4.9539877300613504e-05, "loss": 0.8448630571365356, "step": 647 }, { "epoch": 0.7950920245398773, "grad_norm": 0.17678672075271606, "learning_rate": 4.961656441717792e-05, "loss": 0.9637424945831299, "step": 648 }, { "epoch": 0.7963190184049079, "grad_norm": 0.14790405333042145, "learning_rate": 4.9693251533742335e-05, "loss": 0.9521687030792236, "step": 649 }, { "epoch": 0.7975460122699386, "grad_norm": 0.26001328229904175, "learning_rate": 4.976993865030675e-05, "loss": 0.9722321629524231, "step": 650 }, { "epoch": 0.7987730061349694, "grad_norm": 0.20337051153182983, "learning_rate": 4.984662576687117e-05, "loss": 0.9081771373748779, "step": 651 }, { "epoch": 0.8, "grad_norm": 0.1871991902589798, "learning_rate": 4.992331288343559e-05, "loss": 0.918889045715332, "step": 652 }, { "epoch": 0.8012269938650307, "grad_norm": 0.1606166660785675, "learning_rate": 5e-05, "loss": 0.9198272228240967, "step": 653 }, { "epoch": 0.8024539877300614, "grad_norm": 0.17006827890872955, "learning_rate": 4.9999999496160655e-05, "loss": 0.9726353287696838, "step": 654 }, { "epoch": 0.803680981595092, "grad_norm": 0.1748315393924713, "learning_rate": 4.9999997984642625e-05, "loss": 0.9158386588096619, "step": 655 }, { "epoch": 0.8049079754601227, "grad_norm": 0.2147388756275177, "learning_rate": 4.999999546544598e-05, "loss": 0.9713004231452942, "step": 656 }, { "epoch": 0.8061349693251534, "grad_norm": 0.22589299082756042, "learning_rate": 4.999999193857082e-05, "loss": 0.8359630107879639, "step": 657 }, { "epoch": 0.807361963190184, "grad_norm": 0.18464118242263794, "learning_rate": 4.9999987404017294e-05, "loss": 0.7202401161193848, "step": 658 }, { "epoch": 0.8085889570552147, "grad_norm": 0.1551416665315628, "learning_rate": 4.9999981861785575e-05, "loss": 0.829357385635376, "step": 659 }, { "epoch": 0.8098159509202454, "grad_norm": 0.17344027757644653, "learning_rate": 4.999997531187589e-05, "loss": 0.8804149031639099, "step": 660 }, { "epoch": 0.811042944785276, "grad_norm": 0.19908201694488525, "learning_rate": 4.99999677542885e-05, "loss": 0.9639955759048462, "step": 661 }, { "epoch": 0.8122699386503067, "grad_norm": 0.1594792753458023, "learning_rate": 4.999995918902371e-05, "loss": 1.011673092842102, "step": 662 }, { "epoch": 0.8134969325153374, "grad_norm": 0.15850147604942322, "learning_rate": 4.999994961608186e-05, "loss": 1.0678529739379883, "step": 663 }, { "epoch": 0.8147239263803681, "grad_norm": 0.17994078993797302, "learning_rate": 4.999993903546336e-05, "loss": 1.0356392860412598, "step": 664 }, { "epoch": 0.8159509202453987, "grad_norm": 0.21009568870067596, "learning_rate": 4.999992744716862e-05, "loss": 0.9745035171508789, "step": 665 }, { "epoch": 0.8171779141104294, "grad_norm": 0.1838737577199936, "learning_rate": 4.999991485119809e-05, "loss": 0.8974696397781372, "step": 666 }, { "epoch": 0.8184049079754602, "grad_norm": 0.21063841879367828, "learning_rate": 4.99999012475523e-05, "loss": 0.9549497365951538, "step": 667 }, { "epoch": 0.8196319018404908, "grad_norm": 0.1892765611410141, "learning_rate": 4.9999886636231794e-05, "loss": 0.9214696884155273, "step": 668 }, { "epoch": 0.8208588957055215, "grad_norm": 0.18703222274780273, "learning_rate": 4.9999871017237164e-05, "loss": 0.8872675895690918, "step": 669 }, { "epoch": 0.8220858895705522, "grad_norm": 0.18636217713356018, "learning_rate": 4.9999854390569036e-05, "loss": 0.8719230890274048, "step": 670 }, { "epoch": 0.8233128834355828, "grad_norm": 0.20231357216835022, "learning_rate": 4.999983675622807e-05, "loss": 1.0997915267944336, "step": 671 }, { "epoch": 0.8245398773006135, "grad_norm": 0.18483470380306244, "learning_rate": 4.999981811421499e-05, "loss": 1.0899817943572998, "step": 672 }, { "epoch": 0.8257668711656442, "grad_norm": 0.14923721551895142, "learning_rate": 4.999979846453055e-05, "loss": 1.0715575218200684, "step": 673 }, { "epoch": 0.8269938650306748, "grad_norm": 0.19795477390289307, "learning_rate": 4.9999777807175526e-05, "loss": 0.9783056974411011, "step": 674 }, { "epoch": 0.8282208588957055, "grad_norm": 0.1800733208656311, "learning_rate": 4.9999756142150765e-05, "loss": 1.0455032587051392, "step": 675 }, { "epoch": 0.8294478527607362, "grad_norm": 0.1986493170261383, "learning_rate": 4.999973346945714e-05, "loss": 0.9802632331848145, "step": 676 }, { "epoch": 0.8306748466257668, "grad_norm": 0.17542126774787903, "learning_rate": 4.9999709789095555e-05, "loss": 0.9315134286880493, "step": 677 }, { "epoch": 0.8319018404907975, "grad_norm": 0.17153581976890564, "learning_rate": 4.9999685101066976e-05, "loss": 0.8825066089630127, "step": 678 }, { "epoch": 0.8331288343558282, "grad_norm": 0.1910146027803421, "learning_rate": 4.999965940537238e-05, "loss": 0.9072350859642029, "step": 679 }, { "epoch": 0.8343558282208589, "grad_norm": 0.19643308222293854, "learning_rate": 4.9999632702012825e-05, "loss": 1.0673106908798218, "step": 680 }, { "epoch": 0.8355828220858895, "grad_norm": 0.19155722856521606, "learning_rate": 4.999960499098937e-05, "loss": 0.927787184715271, "step": 681 }, { "epoch": 0.8368098159509203, "grad_norm": 0.18321090936660767, "learning_rate": 4.999957627230314e-05, "loss": 0.9952216744422913, "step": 682 }, { "epoch": 0.838036809815951, "grad_norm": 0.1645468771457672, "learning_rate": 4.99995465459553e-05, "loss": 0.9894980192184448, "step": 683 }, { "epoch": 0.8392638036809816, "grad_norm": 0.1839318871498108, "learning_rate": 4.999951581194703e-05, "loss": 0.8606082201004028, "step": 684 }, { "epoch": 0.8404907975460123, "grad_norm": 0.21668677031993866, "learning_rate": 4.999948407027958e-05, "loss": 0.8108295202255249, "step": 685 }, { "epoch": 0.841717791411043, "grad_norm": 0.17767739295959473, "learning_rate": 4.999945132095423e-05, "loss": 0.984119176864624, "step": 686 }, { "epoch": 0.8429447852760736, "grad_norm": 0.1917819380760193, "learning_rate": 4.99994175639723e-05, "loss": 0.9587100744247437, "step": 687 }, { "epoch": 0.8441717791411043, "grad_norm": 0.1878134310245514, "learning_rate": 4.999938279933514e-05, "loss": 0.8837506771087646, "step": 688 }, { "epoch": 0.845398773006135, "grad_norm": 0.17362691462039948, "learning_rate": 4.9999347027044166e-05, "loss": 1.0654271841049194, "step": 689 }, { "epoch": 0.8466257668711656, "grad_norm": 0.20600073039531708, "learning_rate": 4.999931024710082e-05, "loss": 0.9740208387374878, "step": 690 }, { "epoch": 0.8478527607361963, "grad_norm": 0.15837189555168152, "learning_rate": 4.999927245950656e-05, "loss": 0.938179612159729, "step": 691 }, { "epoch": 0.849079754601227, "grad_norm": 0.15627826750278473, "learning_rate": 4.999923366426295e-05, "loss": 0.9180142879486084, "step": 692 }, { "epoch": 0.8503067484662576, "grad_norm": 0.18184112012386322, "learning_rate": 4.9999193861371516e-05, "loss": 0.7489123344421387, "step": 693 }, { "epoch": 0.8515337423312883, "grad_norm": 0.16103146970272064, "learning_rate": 4.999915305083388e-05, "loss": 0.983184814453125, "step": 694 }, { "epoch": 0.852760736196319, "grad_norm": 0.1987670511007309, "learning_rate": 4.9999111232651684e-05, "loss": 0.8986855149269104, "step": 695 }, { "epoch": 0.8539877300613496, "grad_norm": 0.17597201466560364, "learning_rate": 4.999906840682662e-05, "loss": 1.0236848592758179, "step": 696 }, { "epoch": 0.8552147239263803, "grad_norm": 0.2011665403842926, "learning_rate": 4.9999024573360396e-05, "loss": 0.9104803204536438, "step": 697 }, { "epoch": 0.8564417177914111, "grad_norm": 0.23870515823364258, "learning_rate": 4.99989797322548e-05, "loss": 0.7954229116439819, "step": 698 }, { "epoch": 0.8576687116564418, "grad_norm": 0.2057446390390396, "learning_rate": 4.999893388351163e-05, "loss": 0.8676260113716125, "step": 699 }, { "epoch": 0.8588957055214724, "grad_norm": 0.18938805162906647, "learning_rate": 4.999888702713273e-05, "loss": 0.9894938468933105, "step": 700 }, { "epoch": 0.8601226993865031, "grad_norm": 0.15922167897224426, "learning_rate": 4.9998839163120006e-05, "loss": 1.0513750314712524, "step": 701 }, { "epoch": 0.8613496932515338, "grad_norm": 0.21217896044254303, "learning_rate": 4.999879029147536e-05, "loss": 0.9489930272102356, "step": 702 }, { "epoch": 0.8625766871165644, "grad_norm": 0.1854456216096878, "learning_rate": 4.999874041220078e-05, "loss": 0.931467592716217, "step": 703 }, { "epoch": 0.8638036809815951, "grad_norm": 0.17969129979610443, "learning_rate": 4.999868952529827e-05, "loss": 1.0578186511993408, "step": 704 }, { "epoch": 0.8650306748466258, "grad_norm": 0.20083822309970856, "learning_rate": 4.999863763076989e-05, "loss": 0.9868844151496887, "step": 705 }, { "epoch": 0.8662576687116564, "grad_norm": 0.20575697720050812, "learning_rate": 4.999858472861772e-05, "loss": 1.007995367050171, "step": 706 }, { "epoch": 0.8674846625766871, "grad_norm": 0.1863430142402649, "learning_rate": 4.99985308188439e-05, "loss": 1.0176594257354736, "step": 707 }, { "epoch": 0.8687116564417178, "grad_norm": 0.15815453231334686, "learning_rate": 4.9998475901450603e-05, "loss": 1.0381344556808472, "step": 708 }, { "epoch": 0.8699386503067484, "grad_norm": 0.13374017179012299, "learning_rate": 4.9998419976440036e-05, "loss": 0.9388279914855957, "step": 709 }, { "epoch": 0.8711656441717791, "grad_norm": 0.1851647049188614, "learning_rate": 4.9998363043814464e-05, "loss": 0.8005150556564331, "step": 710 }, { "epoch": 0.8723926380368098, "grad_norm": 0.15230295062065125, "learning_rate": 4.999830510357617e-05, "loss": 1.0193041563034058, "step": 711 }, { "epoch": 0.8736196319018404, "grad_norm": 0.20294666290283203, "learning_rate": 4.999824615572749e-05, "loss": 0.8765863180160522, "step": 712 }, { "epoch": 0.8748466257668711, "grad_norm": 0.17347167432308197, "learning_rate": 4.999818620027082e-05, "loss": 0.808927059173584, "step": 713 }, { "epoch": 0.8760736196319019, "grad_norm": 0.16360050439834595, "learning_rate": 4.999812523720855e-05, "loss": 1.0378289222717285, "step": 714 }, { "epoch": 0.8773006134969326, "grad_norm": 0.15339195728302002, "learning_rate": 4.999806326654315e-05, "loss": 1.1148386001586914, "step": 715 }, { "epoch": 0.8785276073619632, "grad_norm": 0.18984249234199524, "learning_rate": 4.999800028827711e-05, "loss": 1.0074903964996338, "step": 716 }, { "epoch": 0.8797546012269939, "grad_norm": 0.20458391308784485, "learning_rate": 4.9997936302412985e-05, "loss": 0.949413537979126, "step": 717 }, { "epoch": 0.8809815950920246, "grad_norm": 0.1767282634973526, "learning_rate": 4.9997871308953335e-05, "loss": 0.8916364908218384, "step": 718 }, { "epoch": 0.8822085889570552, "grad_norm": 0.17117427289485931, "learning_rate": 4.9997805307900804e-05, "loss": 1.1054184436798096, "step": 719 }, { "epoch": 0.8834355828220859, "grad_norm": 0.14453670382499695, "learning_rate": 4.999773829925802e-05, "loss": 1.0341283082962036, "step": 720 }, { "epoch": 0.8846625766871166, "grad_norm": 0.13492147624492645, "learning_rate": 4.9997670283027706e-05, "loss": 0.9615033268928528, "step": 721 }, { "epoch": 0.8858895705521472, "grad_norm": 0.1712752729654312, "learning_rate": 4.99976012592126e-05, "loss": 1.103288173675537, "step": 722 }, { "epoch": 0.8871165644171779, "grad_norm": 0.21402187645435333, "learning_rate": 4.999753122781548e-05, "loss": 0.9678021669387817, "step": 723 }, { "epoch": 0.8883435582822086, "grad_norm": 0.20895323157310486, "learning_rate": 4.9997460188839186e-05, "loss": 1.050478458404541, "step": 724 }, { "epoch": 0.8895705521472392, "grad_norm": 0.14691133797168732, "learning_rate": 4.999738814228655e-05, "loss": 0.9723455309867859, "step": 725 }, { "epoch": 0.8907975460122699, "grad_norm": 0.21280796825885773, "learning_rate": 4.99973150881605e-05, "loss": 0.9357302188873291, "step": 726 }, { "epoch": 0.8920245398773006, "grad_norm": 0.18227450549602509, "learning_rate": 4.999724102646397e-05, "loss": 0.8263777494430542, "step": 727 }, { "epoch": 0.8932515337423312, "grad_norm": 0.20292320847511292, "learning_rate": 4.999716595719996e-05, "loss": 0.8009226322174072, "step": 728 }, { "epoch": 0.894478527607362, "grad_norm": 0.1754937767982483, "learning_rate": 4.9997089880371465e-05, "loss": 0.8841652870178223, "step": 729 }, { "epoch": 0.8957055214723927, "grad_norm": 0.189581498503685, "learning_rate": 4.999701279598158e-05, "loss": 0.9443190097808838, "step": 730 }, { "epoch": 0.8969325153374234, "grad_norm": 0.1626397669315338, "learning_rate": 4.9996934704033405e-05, "loss": 1.0493483543395996, "step": 731 }, { "epoch": 0.898159509202454, "grad_norm": 0.18093985319137573, "learning_rate": 4.999685560453008e-05, "loss": 1.0749177932739258, "step": 732 }, { "epoch": 0.8993865030674847, "grad_norm": 0.22207815945148468, "learning_rate": 4.999677549747481e-05, "loss": 0.8506104946136475, "step": 733 }, { "epoch": 0.9006134969325154, "grad_norm": 0.28364625573158264, "learning_rate": 4.9996694382870804e-05, "loss": 0.9617756605148315, "step": 734 }, { "epoch": 0.901840490797546, "grad_norm": 0.19717936217784882, "learning_rate": 4.9996612260721343e-05, "loss": 0.8136574029922485, "step": 735 }, { "epoch": 0.9030674846625767, "grad_norm": 0.17888078093528748, "learning_rate": 4.999652913102973e-05, "loss": 0.9387364387512207, "step": 736 }, { "epoch": 0.9042944785276074, "grad_norm": 0.18409819900989532, "learning_rate": 4.9996444993799323e-05, "loss": 0.9015089273452759, "step": 737 }, { "epoch": 0.905521472392638, "grad_norm": 0.18948233127593994, "learning_rate": 4.999635984903351e-05, "loss": 0.9533339738845825, "step": 738 }, { "epoch": 0.9067484662576687, "grad_norm": 0.20744499564170837, "learning_rate": 4.9996273696735727e-05, "loss": 0.8442941308021545, "step": 739 }, { "epoch": 0.9079754601226994, "grad_norm": 0.1678057610988617, "learning_rate": 4.999618653690944e-05, "loss": 0.9215878844261169, "step": 740 }, { "epoch": 0.90920245398773, "grad_norm": 0.17822985351085663, "learning_rate": 4.9996098369558166e-05, "loss": 1.0777983665466309, "step": 741 }, { "epoch": 0.9104294478527607, "grad_norm": 0.18061842024326324, "learning_rate": 4.999600919468545e-05, "loss": 1.1714255809783936, "step": 742 }, { "epoch": 0.9116564417177914, "grad_norm": 0.19064326584339142, "learning_rate": 4.99959190122949e-05, "loss": 0.9962393641471863, "step": 743 }, { "epoch": 0.912883435582822, "grad_norm": 0.183218851685524, "learning_rate": 4.999582782239015e-05, "loss": 0.9951282739639282, "step": 744 }, { "epoch": 0.9141104294478528, "grad_norm": 0.16018925607204437, "learning_rate": 4.9995735624974867e-05, "loss": 0.8326078653335571, "step": 745 }, { "epoch": 0.9153374233128835, "grad_norm": 0.17338116466999054, "learning_rate": 4.999564242005277e-05, "loss": 1.0311954021453857, "step": 746 }, { "epoch": 0.9165644171779141, "grad_norm": 0.2216803878545761, "learning_rate": 4.999554820762762e-05, "loss": 0.7673021554946899, "step": 747 }, { "epoch": 0.9177914110429448, "grad_norm": 0.20231163501739502, "learning_rate": 4.999545298770321e-05, "loss": 0.9003833532333374, "step": 748 }, { "epoch": 0.9190184049079755, "grad_norm": 0.20660831034183502, "learning_rate": 4.9995356760283376e-05, "loss": 1.1056897640228271, "step": 749 }, { "epoch": 0.9202453987730062, "grad_norm": 0.1975238025188446, "learning_rate": 4.9995259525372e-05, "loss": 0.9392615556716919, "step": 750 }, { "epoch": 0.9214723926380368, "grad_norm": 0.1773754507303238, "learning_rate": 4.9995161282973005e-05, "loss": 0.9009476900100708, "step": 751 }, { "epoch": 0.9226993865030675, "grad_norm": 0.15030574798583984, "learning_rate": 4.9995062033090346e-05, "loss": 0.961235523223877, "step": 752 }, { "epoch": 0.9239263803680982, "grad_norm": 0.17725662887096405, "learning_rate": 4.9994961775728025e-05, "loss": 0.842170000076294, "step": 753 }, { "epoch": 0.9251533742331288, "grad_norm": 0.33588743209838867, "learning_rate": 4.999486051089008e-05, "loss": 0.9130297899246216, "step": 754 }, { "epoch": 0.9263803680981595, "grad_norm": 0.19678527116775513, "learning_rate": 4.9994758238580595e-05, "loss": 0.9977635145187378, "step": 755 }, { "epoch": 0.9276073619631902, "grad_norm": 0.1783633530139923, "learning_rate": 4.99946549588037e-05, "loss": 0.9983693361282349, "step": 756 }, { "epoch": 0.9288343558282208, "grad_norm": 0.17483985424041748, "learning_rate": 4.999455067156355e-05, "loss": 0.9683924913406372, "step": 757 }, { "epoch": 0.9300613496932515, "grad_norm": 0.173611581325531, "learning_rate": 4.999444537686434e-05, "loss": 0.9801812171936035, "step": 758 }, { "epoch": 0.9312883435582822, "grad_norm": 0.18866002559661865, "learning_rate": 4.999433907471033e-05, "loss": 0.8385591506958008, "step": 759 }, { "epoch": 0.9325153374233128, "grad_norm": 0.21186470985412598, "learning_rate": 4.9994231765105794e-05, "loss": 1.0104557275772095, "step": 760 }, { "epoch": 0.9337423312883436, "grad_norm": 0.14682714641094208, "learning_rate": 4.9994123448055065e-05, "loss": 1.038000464439392, "step": 761 }, { "epoch": 0.9349693251533743, "grad_norm": 0.15596286952495575, "learning_rate": 4.99940141235625e-05, "loss": 0.9450811743736267, "step": 762 }, { "epoch": 0.9361963190184049, "grad_norm": 0.19662868976593018, "learning_rate": 4.999390379163251e-05, "loss": 0.9996469020843506, "step": 763 }, { "epoch": 0.9374233128834356, "grad_norm": 0.16038548946380615, "learning_rate": 4.999379245226955e-05, "loss": 0.9883758425712585, "step": 764 }, { "epoch": 0.9386503067484663, "grad_norm": 0.19133596122264862, "learning_rate": 4.99936801054781e-05, "loss": 0.8991961479187012, "step": 765 }, { "epoch": 0.939877300613497, "grad_norm": 0.20299014449119568, "learning_rate": 4.999356675126269e-05, "loss": 0.9183839559555054, "step": 766 }, { "epoch": 0.9411042944785276, "grad_norm": 0.22244712710380554, "learning_rate": 4.999345238962788e-05, "loss": 0.8986209630966187, "step": 767 }, { "epoch": 0.9423312883435583, "grad_norm": 0.1528477668762207, "learning_rate": 4.9993337020578295e-05, "loss": 0.8507009744644165, "step": 768 }, { "epoch": 0.943558282208589, "grad_norm": 0.17503724992275238, "learning_rate": 4.999322064411858e-05, "loss": 1.0110118389129639, "step": 769 }, { "epoch": 0.9447852760736196, "grad_norm": 0.2106586992740631, "learning_rate": 4.9993103260253417e-05, "loss": 1.0600779056549072, "step": 770 }, { "epoch": 0.9460122699386503, "grad_norm": 0.1803390085697174, "learning_rate": 4.999298486898755e-05, "loss": 0.8635019063949585, "step": 771 }, { "epoch": 0.947239263803681, "grad_norm": 0.16886591911315918, "learning_rate": 4.999286547032574e-05, "loss": 0.983735203742981, "step": 772 }, { "epoch": 0.9484662576687116, "grad_norm": 0.17368155717849731, "learning_rate": 4.999274506427281e-05, "loss": 0.8585123419761658, "step": 773 }, { "epoch": 0.9496932515337423, "grad_norm": 0.1765257716178894, "learning_rate": 4.9992623650833604e-05, "loss": 0.9488832950592041, "step": 774 }, { "epoch": 0.950920245398773, "grad_norm": 0.1633220613002777, "learning_rate": 4.999250123001302e-05, "loss": 1.0146827697753906, "step": 775 }, { "epoch": 0.9521472392638037, "grad_norm": 0.16535843908786774, "learning_rate": 4.9992377801815994e-05, "loss": 0.9469256401062012, "step": 776 }, { "epoch": 0.9533742331288344, "grad_norm": 0.22510869801044464, "learning_rate": 4.9992253366247496e-05, "loss": 0.851465106010437, "step": 777 }, { "epoch": 0.9546012269938651, "grad_norm": 0.16892805695533752, "learning_rate": 4.9992127923312546e-05, "loss": 0.9254555702209473, "step": 778 }, { "epoch": 0.9558282208588957, "grad_norm": 0.17140288650989532, "learning_rate": 4.9992001473016206e-05, "loss": 0.9841599464416504, "step": 779 }, { "epoch": 0.9570552147239264, "grad_norm": 0.15835928916931152, "learning_rate": 4.999187401536355e-05, "loss": 0.8381795287132263, "step": 780 }, { "epoch": 0.9582822085889571, "grad_norm": 0.18357157707214355, "learning_rate": 4.9991745550359746e-05, "loss": 0.9051885604858398, "step": 781 }, { "epoch": 0.9595092024539877, "grad_norm": 0.16874738037586212, "learning_rate": 4.9991616078009954e-05, "loss": 0.95485520362854, "step": 782 }, { "epoch": 0.9607361963190184, "grad_norm": 0.1565741002559662, "learning_rate": 4.99914855983194e-05, "loss": 0.9834861755371094, "step": 783 }, { "epoch": 0.9619631901840491, "grad_norm": 0.15838754177093506, "learning_rate": 4.999135411129333e-05, "loss": 0.982602596282959, "step": 784 }, { "epoch": 0.9631901840490797, "grad_norm": 0.16057267785072327, "learning_rate": 4.999122161693706e-05, "loss": 1.1054691076278687, "step": 785 }, { "epoch": 0.9644171779141104, "grad_norm": 0.17139388620853424, "learning_rate": 4.9991088115255926e-05, "loss": 1.1416425704956055, "step": 786 }, { "epoch": 0.9656441717791411, "grad_norm": 0.15918061137199402, "learning_rate": 4.9990953606255295e-05, "loss": 1.0820900201797485, "step": 787 }, { "epoch": 0.9668711656441717, "grad_norm": 0.1861863136291504, "learning_rate": 4.999081808994062e-05, "loss": 1.0486385822296143, "step": 788 }, { "epoch": 0.9680981595092024, "grad_norm": 0.16572053730487823, "learning_rate": 4.999068156631733e-05, "loss": 0.9382085800170898, "step": 789 }, { "epoch": 0.9693251533742331, "grad_norm": 0.21080617606639862, "learning_rate": 4.999054403539094e-05, "loss": 0.8533282279968262, "step": 790 }, { "epoch": 0.9705521472392638, "grad_norm": 0.1682722568511963, "learning_rate": 4.9990405497167e-05, "loss": 0.9247971773147583, "step": 791 }, { "epoch": 0.9717791411042945, "grad_norm": 0.14958171546459198, "learning_rate": 4.999026595165109e-05, "loss": 1.0699021816253662, "step": 792 }, { "epoch": 0.9730061349693252, "grad_norm": 0.18665067851543427, "learning_rate": 4.999012539884883e-05, "loss": 0.8970293998718262, "step": 793 }, { "epoch": 0.9742331288343559, "grad_norm": 0.1996026486158371, "learning_rate": 4.99899838387659e-05, "loss": 0.9314415454864502, "step": 794 }, { "epoch": 0.9754601226993865, "grad_norm": 0.15595246851444244, "learning_rate": 4.998984127140798e-05, "loss": 0.9782549142837524, "step": 795 }, { "epoch": 0.9766871165644172, "grad_norm": 0.1842477023601532, "learning_rate": 4.9989697696780834e-05, "loss": 0.8942534923553467, "step": 796 }, { "epoch": 0.9779141104294479, "grad_norm": 0.17138126492500305, "learning_rate": 4.998955311489025e-05, "loss": 0.7598989009857178, "step": 797 }, { "epoch": 0.9791411042944785, "grad_norm": 0.16535896062850952, "learning_rate": 4.998940752574206e-05, "loss": 1.0196363925933838, "step": 798 }, { "epoch": 0.9803680981595092, "grad_norm": 0.16092221438884735, "learning_rate": 4.998926092934212e-05, "loss": 1.0040117502212524, "step": 799 }, { "epoch": 0.9815950920245399, "grad_norm": 0.20901581645011902, "learning_rate": 4.998911332569634e-05, "loss": 0.9634795784950256, "step": 800 }, { "epoch": 0.9828220858895705, "grad_norm": 0.14939260482788086, "learning_rate": 4.998896471481068e-05, "loss": 1.0278257131576538, "step": 801 }, { "epoch": 0.9840490797546012, "grad_norm": 0.16489604115486145, "learning_rate": 4.998881509669111e-05, "loss": 1.138378620147705, "step": 802 }, { "epoch": 0.9852760736196319, "grad_norm": 0.1707465499639511, "learning_rate": 4.998866447134368e-05, "loss": 0.9068062901496887, "step": 803 }, { "epoch": 0.9865030674846625, "grad_norm": 0.19762417674064636, "learning_rate": 4.998851283877446e-05, "loss": 0.829114556312561, "step": 804 }, { "epoch": 0.9877300613496932, "grad_norm": 0.17803099751472473, "learning_rate": 4.998836019898956e-05, "loss": 0.9514222145080566, "step": 805 }, { "epoch": 0.9889570552147239, "grad_norm": 0.1772756427526474, "learning_rate": 4.998820655199512e-05, "loss": 0.8862832188606262, "step": 806 }, { "epoch": 0.9901840490797545, "grad_norm": 0.19499516487121582, "learning_rate": 4.998805189779735e-05, "loss": 0.8982877731323242, "step": 807 }, { "epoch": 0.9914110429447853, "grad_norm": 0.16761308908462524, "learning_rate": 4.998789623640246e-05, "loss": 0.9022541046142578, "step": 808 }, { "epoch": 0.992638036809816, "grad_norm": 0.19328272342681885, "learning_rate": 4.998773956781676e-05, "loss": 0.8655497431755066, "step": 809 }, { "epoch": 0.9938650306748467, "grad_norm": 0.186772882938385, "learning_rate": 4.9987581892046545e-05, "loss": 0.9214582443237305, "step": 810 }, { "epoch": 0.9950920245398773, "grad_norm": 0.2739534080028534, "learning_rate": 4.998742320909816e-05, "loss": 1.0490964651107788, "step": 811 }, { "epoch": 0.996319018404908, "grad_norm": 0.18046629428863525, "learning_rate": 4.998726351897801e-05, "loss": 0.8628851175308228, "step": 812 }, { "epoch": 0.9975460122699387, "grad_norm": 0.15601086616516113, "learning_rate": 4.998710282169255e-05, "loss": 0.9182218313217163, "step": 813 }, { "epoch": 0.9987730061349693, "grad_norm": 0.17621605098247528, "learning_rate": 4.998694111724822e-05, "loss": 0.9243752956390381, "step": 814 }, { "epoch": 1.0, "grad_norm": 0.17228829860687256, "learning_rate": 4.9986778405651567e-05, "loss": 0.8683056831359863, "step": 815 }, { "epoch": 1.0012269938650307, "grad_norm": 0.1976250410079956, "learning_rate": 4.9986614686909146e-05, "loss": 0.8880816698074341, "step": 816 }, { "epoch": 1.0024539877300613, "grad_norm": 0.16485337913036346, "learning_rate": 4.998644996102755e-05, "loss": 0.9071677923202515, "step": 817 }, { "epoch": 1.003680981595092, "grad_norm": 0.18546472489833832, "learning_rate": 4.998628422801341e-05, "loss": 0.8912653923034668, "step": 818 }, { "epoch": 1.0049079754601227, "grad_norm": 0.1922680139541626, "learning_rate": 4.9986117487873426e-05, "loss": 0.8786331415176392, "step": 819 }, { "epoch": 1.0061349693251533, "grad_norm": 0.17929980158805847, "learning_rate": 4.99859497406143e-05, "loss": 0.946155309677124, "step": 820 }, { "epoch": 1.007361963190184, "grad_norm": 0.17702297866344452, "learning_rate": 4.9985780986242816e-05, "loss": 0.8424768447875977, "step": 821 }, { "epoch": 1.0085889570552147, "grad_norm": 0.1808130145072937, "learning_rate": 4.998561122476575e-05, "loss": 0.8914746046066284, "step": 822 }, { "epoch": 1.0098159509202453, "grad_norm": 0.13399691879749298, "learning_rate": 4.998544045618996e-05, "loss": 0.9735754728317261, "step": 823 }, { "epoch": 1.011042944785276, "grad_norm": 0.19942635297775269, "learning_rate": 4.9985268680522326e-05, "loss": 0.9538228511810303, "step": 824 }, { "epoch": 1.0122699386503067, "grad_norm": 0.17560924589633942, "learning_rate": 4.998509589776978e-05, "loss": 0.9249204397201538, "step": 825 }, { "epoch": 1.0134969325153373, "grad_norm": 0.1779119074344635, "learning_rate": 4.9984922107939255e-05, "loss": 0.882927417755127, "step": 826 }, { "epoch": 1.014723926380368, "grad_norm": 0.17338088154792786, "learning_rate": 4.99847473110378e-05, "loss": 0.954800009727478, "step": 827 }, { "epoch": 1.0159509202453987, "grad_norm": 0.17504006624221802, "learning_rate": 4.998457150707243e-05, "loss": 0.8615262508392334, "step": 828 }, { "epoch": 1.0171779141104293, "grad_norm": 0.17492976784706116, "learning_rate": 4.9984394696050245e-05, "loss": 0.8399525284767151, "step": 829 }, { "epoch": 1.01840490797546, "grad_norm": 0.1842600256204605, "learning_rate": 4.9984216877978366e-05, "loss": 1.0419448614120483, "step": 830 }, { "epoch": 1.019631901840491, "grad_norm": 0.16997100412845612, "learning_rate": 4.998403805286397e-05, "loss": 0.8578265905380249, "step": 831 }, { "epoch": 1.0208588957055216, "grad_norm": 0.2146667093038559, "learning_rate": 4.998385822071424e-05, "loss": 0.8514478802680969, "step": 832 }, { "epoch": 1.0220858895705522, "grad_norm": 0.18664850294589996, "learning_rate": 4.9983677381536454e-05, "loss": 0.9468450546264648, "step": 833 }, { "epoch": 1.023312883435583, "grad_norm": 0.18977423012256622, "learning_rate": 4.998349553533789e-05, "loss": 0.9018813371658325, "step": 834 }, { "epoch": 1.0245398773006136, "grad_norm": 0.2647550702095032, "learning_rate": 4.998331268212587e-05, "loss": 0.9554318189620972, "step": 835 }, { "epoch": 1.0257668711656442, "grad_norm": 0.171381413936615, "learning_rate": 4.998312882190777e-05, "loss": 0.9422781467437744, "step": 836 }, { "epoch": 1.026993865030675, "grad_norm": 0.18868215382099152, "learning_rate": 4.9982943954691e-05, "loss": 0.8463807106018066, "step": 837 }, { "epoch": 1.0282208588957056, "grad_norm": 0.1572667807340622, "learning_rate": 4.9982758080483014e-05, "loss": 0.9241183996200562, "step": 838 }, { "epoch": 1.0294478527607362, "grad_norm": 0.1927393674850464, "learning_rate": 4.998257119929131e-05, "loss": 0.8769881725311279, "step": 839 }, { "epoch": 1.030674846625767, "grad_norm": 0.15615084767341614, "learning_rate": 4.998238331112341e-05, "loss": 0.9645507335662842, "step": 840 }, { "epoch": 1.0319018404907976, "grad_norm": 0.2330227643251419, "learning_rate": 4.998219441598689e-05, "loss": 0.8955655097961426, "step": 841 }, { "epoch": 1.0331288343558283, "grad_norm": 0.1678479015827179, "learning_rate": 4.998200451388936e-05, "loss": 0.8918401002883911, "step": 842 }, { "epoch": 1.034355828220859, "grad_norm": 0.2001941055059433, "learning_rate": 4.9981813604838484e-05, "loss": 0.92240971326828, "step": 843 }, { "epoch": 1.0355828220858896, "grad_norm": 0.19572116434574127, "learning_rate": 4.998162168884195e-05, "loss": 1.0301885604858398, "step": 844 }, { "epoch": 1.0368098159509203, "grad_norm": 0.16251637041568756, "learning_rate": 4.99814287659075e-05, "loss": 1.0963749885559082, "step": 845 }, { "epoch": 1.038036809815951, "grad_norm": 0.21147343516349792, "learning_rate": 4.99812348360429e-05, "loss": 0.9228247404098511, "step": 846 }, { "epoch": 1.0392638036809816, "grad_norm": 0.19230486452579498, "learning_rate": 4.998103989925597e-05, "loss": 0.8990166783332825, "step": 847 }, { "epoch": 1.0404907975460123, "grad_norm": 0.18830150365829468, "learning_rate": 4.9980843955554577e-05, "loss": 1.0715434551239014, "step": 848 }, { "epoch": 1.041717791411043, "grad_norm": 0.1890118420124054, "learning_rate": 4.998064700494661e-05, "loss": 0.9399991035461426, "step": 849 }, { "epoch": 1.0429447852760736, "grad_norm": 0.2539274990558624, "learning_rate": 4.9980449047440005e-05, "loss": 1.0276734828948975, "step": 850 }, { "epoch": 1.0441717791411043, "grad_norm": 0.21385012567043304, "learning_rate": 4.998025008304275e-05, "loss": 0.944549560546875, "step": 851 }, { "epoch": 1.045398773006135, "grad_norm": 0.21141473948955536, "learning_rate": 4.9980050111762856e-05, "loss": 0.8919497728347778, "step": 852 }, { "epoch": 1.0466257668711656, "grad_norm": 0.16924583911895752, "learning_rate": 4.9979849133608395e-05, "loss": 1.02620530128479, "step": 853 }, { "epoch": 1.0478527607361963, "grad_norm": 0.15970051288604736, "learning_rate": 4.997964714858745e-05, "loss": 0.9994151592254639, "step": 854 }, { "epoch": 1.049079754601227, "grad_norm": 0.20956160128116608, "learning_rate": 4.9979444156708175e-05, "loss": 0.806256890296936, "step": 855 }, { "epoch": 1.0503067484662576, "grad_norm": 0.1460399478673935, "learning_rate": 4.997924015797876e-05, "loss": 0.9205178022384644, "step": 856 }, { "epoch": 1.0515337423312883, "grad_norm": 0.18206287920475006, "learning_rate": 4.99790351524074e-05, "loss": 0.8680782914161682, "step": 857 }, { "epoch": 1.052760736196319, "grad_norm": 0.1734090894460678, "learning_rate": 4.997882914000239e-05, "loss": 1.0228880643844604, "step": 858 }, { "epoch": 1.0539877300613496, "grad_norm": 0.14811742305755615, "learning_rate": 4.9978622120772014e-05, "loss": 0.9824391603469849, "step": 859 }, { "epoch": 1.0552147239263803, "grad_norm": 0.15148505568504333, "learning_rate": 4.9978414094724624e-05, "loss": 0.9428403973579407, "step": 860 }, { "epoch": 1.056441717791411, "grad_norm": 0.17995113134384155, "learning_rate": 4.99782050618686e-05, "loss": 0.9623274803161621, "step": 861 }, { "epoch": 1.0576687116564418, "grad_norm": 0.17370010912418365, "learning_rate": 4.997799502221236e-05, "loss": 0.9799333214759827, "step": 862 }, { "epoch": 1.0588957055214725, "grad_norm": 0.2006457895040512, "learning_rate": 4.99777839757644e-05, "loss": 0.8451767563819885, "step": 863 }, { "epoch": 1.0601226993865032, "grad_norm": 0.16847147047519684, "learning_rate": 4.9977571922533197e-05, "loss": 0.8689521551132202, "step": 864 }, { "epoch": 1.0613496932515338, "grad_norm": 0.19943423569202423, "learning_rate": 4.99773588625273e-05, "loss": 0.8957946300506592, "step": 865 }, { "epoch": 1.0625766871165645, "grad_norm": 0.17353525757789612, "learning_rate": 4.9977144795755314e-05, "loss": 0.9135574102401733, "step": 866 }, { "epoch": 1.0638036809815952, "grad_norm": 0.1804380714893341, "learning_rate": 4.997692972222586e-05, "loss": 0.9338076710700989, "step": 867 }, { "epoch": 1.0650306748466258, "grad_norm": 0.21410247683525085, "learning_rate": 4.99767136419476e-05, "loss": 0.8560439348220825, "step": 868 }, { "epoch": 1.0662576687116565, "grad_norm": 0.18686075508594513, "learning_rate": 4.997649655492925e-05, "loss": 0.900361180305481, "step": 869 }, { "epoch": 1.0674846625766872, "grad_norm": 0.1746203899383545, "learning_rate": 4.997627846117957e-05, "loss": 0.9221999049186707, "step": 870 }, { "epoch": 1.0687116564417178, "grad_norm": 0.19177904725074768, "learning_rate": 4.9976059360707326e-05, "loss": 0.8867021799087524, "step": 871 }, { "epoch": 1.0699386503067485, "grad_norm": 0.20389696955680847, "learning_rate": 4.9975839253521376e-05, "loss": 0.7550114393234253, "step": 872 }, { "epoch": 1.0711656441717792, "grad_norm": 0.18040592968463898, "learning_rate": 4.997561813963056e-05, "loss": 0.8769679069519043, "step": 873 }, { "epoch": 1.0723926380368098, "grad_norm": 0.1760079711675644, "learning_rate": 4.9975396019043827e-05, "loss": 0.954287052154541, "step": 874 }, { "epoch": 1.0736196319018405, "grad_norm": 0.2436283528804779, "learning_rate": 4.99751728917701e-05, "loss": 0.7210538983345032, "step": 875 }, { "epoch": 1.0748466257668712, "grad_norm": 0.18974623084068298, "learning_rate": 4.9974948757818395e-05, "loss": 0.8592209815979004, "step": 876 }, { "epoch": 1.0760736196319018, "grad_norm": 0.2737760543823242, "learning_rate": 4.997472361719773e-05, "loss": 0.8340908288955688, "step": 877 }, { "epoch": 1.0773006134969325, "grad_norm": 0.1639503389596939, "learning_rate": 4.9974497469917195e-05, "loss": 0.9439669847488403, "step": 878 }, { "epoch": 1.0785276073619632, "grad_norm": 0.17427566647529602, "learning_rate": 4.997427031598588e-05, "loss": 0.8845298290252686, "step": 879 }, { "epoch": 1.0797546012269938, "grad_norm": 0.2078152894973755, "learning_rate": 4.997404215541297e-05, "loss": 0.8308533430099487, "step": 880 }, { "epoch": 1.0809815950920245, "grad_norm": 0.19694223999977112, "learning_rate": 4.997381298820765e-05, "loss": 0.8487362861633301, "step": 881 }, { "epoch": 1.0822085889570552, "grad_norm": 0.18540264666080475, "learning_rate": 4.997358281437915e-05, "loss": 0.869340181350708, "step": 882 }, { "epoch": 1.0834355828220859, "grad_norm": 0.23404045403003693, "learning_rate": 4.9973351633936755e-05, "loss": 0.8505808115005493, "step": 883 }, { "epoch": 1.0846625766871165, "grad_norm": 0.20780116319656372, "learning_rate": 4.997311944688978e-05, "loss": 0.9032593965530396, "step": 884 }, { "epoch": 1.0858895705521472, "grad_norm": 0.2057742029428482, "learning_rate": 4.997288625324759e-05, "loss": 0.8967803716659546, "step": 885 }, { "epoch": 1.0871165644171779, "grad_norm": 0.22534185647964478, "learning_rate": 4.997265205301957e-05, "loss": 0.9379975199699402, "step": 886 }, { "epoch": 1.0883435582822085, "grad_norm": 0.1628478765487671, "learning_rate": 4.997241684621519e-05, "loss": 0.966636061668396, "step": 887 }, { "epoch": 1.0895705521472392, "grad_norm": 0.18084381520748138, "learning_rate": 4.9972180632843896e-05, "loss": 0.9809795022010803, "step": 888 }, { "epoch": 1.0907975460122699, "grad_norm": 0.18842187523841858, "learning_rate": 4.9971943412915225e-05, "loss": 0.8881771564483643, "step": 889 }, { "epoch": 1.0920245398773005, "grad_norm": 0.233869269490242, "learning_rate": 4.997170518643874e-05, "loss": 0.7870899438858032, "step": 890 }, { "epoch": 1.0932515337423312, "grad_norm": 0.1819472461938858, "learning_rate": 4.997146595342404e-05, "loss": 0.8591310381889343, "step": 891 }, { "epoch": 1.0944785276073619, "grad_norm": 0.1636793464422226, "learning_rate": 4.9971225713880766e-05, "loss": 1.0031218528747559, "step": 892 }, { "epoch": 1.0957055214723925, "grad_norm": 0.1657864898443222, "learning_rate": 4.9970984467818603e-05, "loss": 0.9159972071647644, "step": 893 }, { "epoch": 1.0969325153374232, "grad_norm": 0.17046967148780823, "learning_rate": 4.997074221524728e-05, "loss": 1.0405173301696777, "step": 894 }, { "epoch": 1.098159509202454, "grad_norm": 0.18485808372497559, "learning_rate": 4.997049895617655e-05, "loss": 1.0235645771026611, "step": 895 }, { "epoch": 1.0993865030674848, "grad_norm": 0.2029455453157425, "learning_rate": 4.997025469061624e-05, "loss": 0.8844905495643616, "step": 896 }, { "epoch": 1.1006134969325154, "grad_norm": 0.18018127977848053, "learning_rate": 4.9970009418576166e-05, "loss": 0.7983524799346924, "step": 897 }, { "epoch": 1.101840490797546, "grad_norm": 0.20646725594997406, "learning_rate": 4.996976314006623e-05, "loss": 0.8013898134231567, "step": 898 }, { "epoch": 1.1030674846625768, "grad_norm": 0.1593887060880661, "learning_rate": 4.996951585509636e-05, "loss": 0.9482541084289551, "step": 899 }, { "epoch": 1.1042944785276074, "grad_norm": 0.15874724090099335, "learning_rate": 4.9969267563676526e-05, "loss": 0.9711331129074097, "step": 900 }, { "epoch": 1.105521472392638, "grad_norm": 0.17873366177082062, "learning_rate": 4.9969018265816725e-05, "loss": 0.911928117275238, "step": 901 }, { "epoch": 1.1067484662576688, "grad_norm": 0.1544751077890396, "learning_rate": 4.996876796152702e-05, "loss": 0.8429673910140991, "step": 902 }, { "epoch": 1.1079754601226994, "grad_norm": 0.21346110105514526, "learning_rate": 4.9968516650817485e-05, "loss": 1.0316723585128784, "step": 903 }, { "epoch": 1.10920245398773, "grad_norm": 0.21061743795871735, "learning_rate": 4.9968264333698254e-05, "loss": 0.9436956644058228, "step": 904 }, { "epoch": 1.1104294478527608, "grad_norm": 0.19813691079616547, "learning_rate": 4.99680110101795e-05, "loss": 0.8727914094924927, "step": 905 }, { "epoch": 1.1116564417177914, "grad_norm": 0.15448185801506042, "learning_rate": 4.996775668027144e-05, "loss": 0.984161376953125, "step": 906 }, { "epoch": 1.112883435582822, "grad_norm": 1.444457769393921, "learning_rate": 4.996750134398431e-05, "loss": 0.7797319293022156, "step": 907 }, { "epoch": 1.1141104294478528, "grad_norm": 0.2197367548942566, "learning_rate": 4.996724500132842e-05, "loss": 0.7754565477371216, "step": 908 }, { "epoch": 1.1153374233128834, "grad_norm": 0.17264893651008606, "learning_rate": 4.996698765231409e-05, "loss": 0.8168716430664062, "step": 909 }, { "epoch": 1.116564417177914, "grad_norm": 0.16804663836956024, "learning_rate": 4.996672929695169e-05, "loss": 0.8931959867477417, "step": 910 }, { "epoch": 1.1177914110429448, "grad_norm": 0.1793871521949768, "learning_rate": 4.9966469935251644e-05, "loss": 0.9204267263412476, "step": 911 }, { "epoch": 1.1190184049079754, "grad_norm": 0.1588888317346573, "learning_rate": 4.9966209567224396e-05, "loss": 1.0511765480041504, "step": 912 }, { "epoch": 1.120245398773006, "grad_norm": 0.16447052359580994, "learning_rate": 4.9965948192880454e-05, "loss": 0.9583656787872314, "step": 913 }, { "epoch": 1.1214723926380368, "grad_norm": 0.19050590693950653, "learning_rate": 4.996568581223034e-05, "loss": 0.950882613658905, "step": 914 }, { "epoch": 1.1226993865030674, "grad_norm": 0.19009937345981598, "learning_rate": 4.996542242528464e-05, "loss": 0.9432741403579712, "step": 915 }, { "epoch": 1.123926380368098, "grad_norm": 0.1471281200647354, "learning_rate": 4.996515803205396e-05, "loss": 0.9461722373962402, "step": 916 }, { "epoch": 1.1251533742331288, "grad_norm": 0.1735878735780716, "learning_rate": 4.996489263254896e-05, "loss": 0.9530206918716431, "step": 917 }, { "epoch": 1.1263803680981594, "grad_norm": 0.14906206727027893, "learning_rate": 4.9964626226780356e-05, "loss": 0.981113612651825, "step": 918 }, { "epoch": 1.1276073619631901, "grad_norm": 0.16395413875579834, "learning_rate": 4.9964358814758855e-05, "loss": 0.939933717250824, "step": 919 }, { "epoch": 1.1288343558282208, "grad_norm": 0.19772851467132568, "learning_rate": 4.996409039649525e-05, "loss": 0.9201581478118896, "step": 920 }, { "epoch": 1.1300613496932514, "grad_norm": 0.2113935351371765, "learning_rate": 4.996382097200037e-05, "loss": 0.7705509066581726, "step": 921 }, { "epoch": 1.1312883435582821, "grad_norm": 0.17428363859653473, "learning_rate": 4.996355054128506e-05, "loss": 0.9796975255012512, "step": 922 }, { "epoch": 1.132515337423313, "grad_norm": 0.33713966608047485, "learning_rate": 4.9963279104360225e-05, "loss": 0.887941837310791, "step": 923 }, { "epoch": 1.1337423312883437, "grad_norm": 0.1998036652803421, "learning_rate": 4.996300666123682e-05, "loss": 1.018757939338684, "step": 924 }, { "epoch": 1.1349693251533743, "grad_norm": 0.18734171986579895, "learning_rate": 4.996273321192579e-05, "loss": 0.8301855325698853, "step": 925 }, { "epoch": 1.136196319018405, "grad_norm": 0.1573547124862671, "learning_rate": 4.99624587564382e-05, "loss": 0.9977589845657349, "step": 926 }, { "epoch": 1.1374233128834357, "grad_norm": 0.18706560134887695, "learning_rate": 4.9962183294785085e-05, "loss": 0.9003831148147583, "step": 927 }, { "epoch": 1.1386503067484663, "grad_norm": 0.23741434514522552, "learning_rate": 4.9961906826977555e-05, "loss": 0.9515665173530579, "step": 928 }, { "epoch": 1.139877300613497, "grad_norm": 0.16731296479701996, "learning_rate": 4.996162935302676e-05, "loss": 1.0333008766174316, "step": 929 }, { "epoch": 1.1411042944785277, "grad_norm": 0.1866004765033722, "learning_rate": 4.996135087294388e-05, "loss": 0.9106324315071106, "step": 930 }, { "epoch": 1.1423312883435583, "grad_norm": 0.20279163122177124, "learning_rate": 4.9961071386740125e-05, "loss": 0.7662670016288757, "step": 931 }, { "epoch": 1.143558282208589, "grad_norm": 0.15475070476531982, "learning_rate": 4.9960790894426776e-05, "loss": 0.8718991279602051, "step": 932 }, { "epoch": 1.1447852760736197, "grad_norm": 0.18648572266101837, "learning_rate": 4.9960509396015145e-05, "loss": 0.9528915286064148, "step": 933 }, { "epoch": 1.1460122699386504, "grad_norm": 0.19758549332618713, "learning_rate": 4.996022689151657e-05, "loss": 0.7939724922180176, "step": 934 }, { "epoch": 1.147239263803681, "grad_norm": 0.1957622915506363, "learning_rate": 4.995994338094243e-05, "loss": 0.7855620980262756, "step": 935 }, { "epoch": 1.1484662576687117, "grad_norm": 0.3346389830112457, "learning_rate": 4.995965886430417e-05, "loss": 0.9882651567459106, "step": 936 }, { "epoch": 1.1496932515337424, "grad_norm": 0.17514066398143768, "learning_rate": 4.995937334161324e-05, "loss": 0.8928797841072083, "step": 937 }, { "epoch": 1.150920245398773, "grad_norm": 0.18980808556079865, "learning_rate": 4.9959086812881164e-05, "loss": 0.948493480682373, "step": 938 }, { "epoch": 1.1521472392638037, "grad_norm": 0.19660504162311554, "learning_rate": 4.995879927811948e-05, "loss": 0.9865711331367493, "step": 939 }, { "epoch": 1.1533742331288344, "grad_norm": 0.19233614206314087, "learning_rate": 4.995851073733978e-05, "loss": 0.9162279367446899, "step": 940 }, { "epoch": 1.154601226993865, "grad_norm": 0.20550848543643951, "learning_rate": 4.99582211905537e-05, "loss": 0.9391956329345703, "step": 941 }, { "epoch": 1.1558282208588957, "grad_norm": 0.18902291357517242, "learning_rate": 4.99579306377729e-05, "loss": 0.9069676399230957, "step": 942 }, { "epoch": 1.1570552147239264, "grad_norm": 0.17482182383537292, "learning_rate": 4.995763907900911e-05, "loss": 0.8593067526817322, "step": 943 }, { "epoch": 1.158282208588957, "grad_norm": 0.1863706409931183, "learning_rate": 4.9957346514274064e-05, "loss": 0.8647783994674683, "step": 944 }, { "epoch": 1.1595092024539877, "grad_norm": 0.17462024092674255, "learning_rate": 4.995705294357955e-05, "loss": 0.898531436920166, "step": 945 }, { "epoch": 1.1607361963190184, "grad_norm": 0.214217409491539, "learning_rate": 4.995675836693743e-05, "loss": 0.9573882818222046, "step": 946 }, { "epoch": 1.161963190184049, "grad_norm": 0.18728120625019073, "learning_rate": 4.995646278435955e-05, "loss": 0.9059288501739502, "step": 947 }, { "epoch": 1.1631901840490797, "grad_norm": 0.1746053397655487, "learning_rate": 4.995616619585784e-05, "loss": 0.9775527715682983, "step": 948 }, { "epoch": 1.1644171779141104, "grad_norm": 0.1668967455625534, "learning_rate": 4.9955868601444235e-05, "loss": 0.9878149032592773, "step": 949 }, { "epoch": 1.165644171779141, "grad_norm": 0.1910443902015686, "learning_rate": 4.9955570001130746e-05, "loss": 0.8854741454124451, "step": 950 }, { "epoch": 1.1668711656441717, "grad_norm": 0.17224320769309998, "learning_rate": 4.995527039492941e-05, "loss": 0.8447737693786621, "step": 951 }, { "epoch": 1.1680981595092024, "grad_norm": 0.3415190279483795, "learning_rate": 4.99549697828523e-05, "loss": 0.9457880258560181, "step": 952 }, { "epoch": 1.169325153374233, "grad_norm": 0.19370616972446442, "learning_rate": 4.995466816491153e-05, "loss": 0.9201220273971558, "step": 953 }, { "epoch": 1.1705521472392637, "grad_norm": 0.18859557807445526, "learning_rate": 4.995436554111926e-05, "loss": 0.912129282951355, "step": 954 }, { "epoch": 1.1717791411042944, "grad_norm": 0.1855250746011734, "learning_rate": 4.9954061911487686e-05, "loss": 1.1316628456115723, "step": 955 }, { "epoch": 1.173006134969325, "grad_norm": 0.18055830895900726, "learning_rate": 4.995375727602905e-05, "loss": 0.9989950656890869, "step": 956 }, { "epoch": 1.1742331288343557, "grad_norm": 0.16536639630794525, "learning_rate": 4.995345163475563e-05, "loss": 0.9005239009857178, "step": 957 }, { "epoch": 1.1754601226993864, "grad_norm": 0.15268491208553314, "learning_rate": 4.9953144987679734e-05, "loss": 0.9843144416809082, "step": 958 }, { "epoch": 1.1766871165644173, "grad_norm": 0.1884283721446991, "learning_rate": 4.995283733481374e-05, "loss": 0.8874081373214722, "step": 959 }, { "epoch": 1.177914110429448, "grad_norm": 0.253836065530777, "learning_rate": 4.995252867617004e-05, "loss": 0.9611265063285828, "step": 960 }, { "epoch": 1.1791411042944786, "grad_norm": 0.23772500455379486, "learning_rate": 4.9952219011761084e-05, "loss": 0.750025749206543, "step": 961 }, { "epoch": 1.1803680981595093, "grad_norm": 0.17383912205696106, "learning_rate": 4.995190834159933e-05, "loss": 0.8785930871963501, "step": 962 }, { "epoch": 1.18159509202454, "grad_norm": 0.2006940394639969, "learning_rate": 4.995159666569733e-05, "loss": 1.0519996881484985, "step": 963 }, { "epoch": 1.1828220858895706, "grad_norm": 0.19629088044166565, "learning_rate": 4.995128398406762e-05, "loss": 0.8588075637817383, "step": 964 }, { "epoch": 1.1840490797546013, "grad_norm": 0.2147558480501175, "learning_rate": 4.995097029672282e-05, "loss": 0.843407154083252, "step": 965 }, { "epoch": 1.185276073619632, "grad_norm": 0.14338521659374237, "learning_rate": 4.995065560367557e-05, "loss": 1.1033785343170166, "step": 966 }, { "epoch": 1.1865030674846626, "grad_norm": 0.1498812586069107, "learning_rate": 4.995033990493856e-05, "loss": 1.0449490547180176, "step": 967 }, { "epoch": 1.1877300613496933, "grad_norm": 0.19823846220970154, "learning_rate": 4.9950023200524504e-05, "loss": 0.9530608654022217, "step": 968 }, { "epoch": 1.188957055214724, "grad_norm": 0.19060662388801575, "learning_rate": 4.994970549044617e-05, "loss": 0.9818426966667175, "step": 969 }, { "epoch": 1.1901840490797546, "grad_norm": 0.14769811928272247, "learning_rate": 4.994938677471637e-05, "loss": 0.9696356058120728, "step": 970 }, { "epoch": 1.1914110429447853, "grad_norm": 0.17662683129310608, "learning_rate": 4.9949067053347947e-05, "loss": 0.9001758098602295, "step": 971 }, { "epoch": 1.192638036809816, "grad_norm": 0.16437295079231262, "learning_rate": 4.994874632635379e-05, "loss": 1.0215225219726562, "step": 972 }, { "epoch": 1.1938650306748466, "grad_norm": 0.19976036250591278, "learning_rate": 4.994842459374682e-05, "loss": 0.8425062894821167, "step": 973 }, { "epoch": 1.1950920245398773, "grad_norm": 0.17059281468391418, "learning_rate": 4.994810185554001e-05, "loss": 0.8922945261001587, "step": 974 }, { "epoch": 1.196319018404908, "grad_norm": 0.1742866486310959, "learning_rate": 4.9947778111746376e-05, "loss": 1.0632004737854004, "step": 975 }, { "epoch": 1.1975460122699386, "grad_norm": 0.1750059574842453, "learning_rate": 4.994745336237895e-05, "loss": 0.964726448059082, "step": 976 }, { "epoch": 1.1987730061349693, "grad_norm": 0.17557790875434875, "learning_rate": 4.994712760745084e-05, "loss": 1.0713610649108887, "step": 977 }, { "epoch": 1.2, "grad_norm": 0.1743466705083847, "learning_rate": 4.994680084697516e-05, "loss": 0.9185527563095093, "step": 978 }, { "epoch": 1.2012269938650306, "grad_norm": 0.2254081517457962, "learning_rate": 4.994647308096509e-05, "loss": 0.9607516527175903, "step": 979 }, { "epoch": 1.2024539877300613, "grad_norm": 0.16291455924510956, "learning_rate": 4.994614430943384e-05, "loss": 0.9997134208679199, "step": 980 }, { "epoch": 1.203680981595092, "grad_norm": 0.14439992606639862, "learning_rate": 4.994581453239466e-05, "loss": 0.9715535640716553, "step": 981 }, { "epoch": 1.2049079754601226, "grad_norm": 0.21888069808483124, "learning_rate": 4.994548374986085e-05, "loss": 0.7919667959213257, "step": 982 }, { "epoch": 1.2061349693251533, "grad_norm": 0.2148587554693222, "learning_rate": 4.994515196184573e-05, "loss": 0.8938875198364258, "step": 983 }, { "epoch": 1.207361963190184, "grad_norm": 0.18126434087753296, "learning_rate": 4.994481916836269e-05, "loss": 0.9488588571548462, "step": 984 }, { "epoch": 1.2085889570552146, "grad_norm": 0.16359232366085052, "learning_rate": 4.994448536942512e-05, "loss": 0.9237864017486572, "step": 985 }, { "epoch": 1.2098159509202455, "grad_norm": 0.18861369788646698, "learning_rate": 4.9944150565046496e-05, "loss": 0.8947683572769165, "step": 986 }, { "epoch": 1.2110429447852762, "grad_norm": 0.18566200137138367, "learning_rate": 4.994381475524031e-05, "loss": 0.9830033779144287, "step": 987 }, { "epoch": 1.2122699386503069, "grad_norm": 0.16420194506645203, "learning_rate": 4.9943477940020086e-05, "loss": 0.993489146232605, "step": 988 }, { "epoch": 1.2134969325153375, "grad_norm": 0.22190405428409576, "learning_rate": 4.994314011939941e-05, "loss": 0.8625966906547546, "step": 989 }, { "epoch": 1.2147239263803682, "grad_norm": 0.19591230154037476, "learning_rate": 4.994280129339189e-05, "loss": 0.7712403535842896, "step": 990 }, { "epoch": 1.2159509202453989, "grad_norm": 0.18596316874027252, "learning_rate": 4.9942461462011206e-05, "loss": 0.7834197878837585, "step": 991 }, { "epoch": 1.2171779141104295, "grad_norm": 0.1753762662410736, "learning_rate": 4.9942120625271027e-05, "loss": 0.864272952079773, "step": 992 }, { "epoch": 1.2184049079754602, "grad_norm": 0.18955479562282562, "learning_rate": 4.99417787831851e-05, "loss": 0.8996263742446899, "step": 993 }, { "epoch": 1.2196319018404909, "grad_norm": 0.18759360909461975, "learning_rate": 4.994143593576721e-05, "loss": 0.8980849385261536, "step": 994 }, { "epoch": 1.2208588957055215, "grad_norm": 0.16138789057731628, "learning_rate": 4.994109208303117e-05, "loss": 0.8556692600250244, "step": 995 }, { "epoch": 1.2220858895705522, "grad_norm": 0.14422741532325745, "learning_rate": 4.9940747224990846e-05, "loss": 0.8877030611038208, "step": 996 }, { "epoch": 1.2233128834355829, "grad_norm": 0.16205506026744843, "learning_rate": 4.994040136166014e-05, "loss": 0.8970627784729004, "step": 997 }, { "epoch": 1.2245398773006135, "grad_norm": 0.20970997214317322, "learning_rate": 4.994005449305298e-05, "loss": 0.9829838871955872, "step": 998 }, { "epoch": 1.2257668711656442, "grad_norm": 0.18686479330062866, "learning_rate": 4.993970661918335e-05, "loss": 0.8872844576835632, "step": 999 }, { "epoch": 1.2269938650306749, "grad_norm": 0.17308223247528076, "learning_rate": 4.9939357740065284e-05, "loss": 0.9612942934036255, "step": 1000 }, { "epoch": 1.2282208588957055, "grad_norm": 0.21058037877082825, "learning_rate": 4.9939007855712836e-05, "loss": 0.9256113171577454, "step": 1001 }, { "epoch": 1.2294478527607362, "grad_norm": 0.16261765360832214, "learning_rate": 4.993865696614011e-05, "loss": 0.9644488096237183, "step": 1002 }, { "epoch": 1.2306748466257669, "grad_norm": 0.1749044805765152, "learning_rate": 4.993830507136125e-05, "loss": 0.8825243711471558, "step": 1003 }, { "epoch": 1.2319018404907975, "grad_norm": 0.1651807278394699, "learning_rate": 4.9937952171390426e-05, "loss": 0.9710007905960083, "step": 1004 }, { "epoch": 1.2331288343558282, "grad_norm": 0.1684868186712265, "learning_rate": 4.993759826624189e-05, "loss": 0.9900846481323242, "step": 1005 }, { "epoch": 1.2343558282208589, "grad_norm": 0.17447324097156525, "learning_rate": 4.993724335592988e-05, "loss": 0.8984297513961792, "step": 1006 }, { "epoch": 1.2355828220858895, "grad_norm": 0.17285595834255219, "learning_rate": 4.9936887440468724e-05, "loss": 0.9773588180541992, "step": 1007 }, { "epoch": 1.2368098159509202, "grad_norm": 0.17950083315372467, "learning_rate": 4.993653051987276e-05, "loss": 1.0576348304748535, "step": 1008 }, { "epoch": 1.2380368098159509, "grad_norm": 0.16765305399894714, "learning_rate": 4.993617259415636e-05, "loss": 0.9235082864761353, "step": 1009 }, { "epoch": 1.2392638036809815, "grad_norm": 0.1997142732143402, "learning_rate": 4.993581366333398e-05, "loss": 0.9239630103111267, "step": 1010 }, { "epoch": 1.2404907975460122, "grad_norm": 0.16271725296974182, "learning_rate": 4.993545372742006e-05, "loss": 0.950951337814331, "step": 1011 }, { "epoch": 1.2417177914110429, "grad_norm": 0.1758057326078415, "learning_rate": 4.993509278642911e-05, "loss": 1.05390465259552, "step": 1012 }, { "epoch": 1.2429447852760735, "grad_norm": 0.2185579091310501, "learning_rate": 4.99347308403757e-05, "loss": 0.9211970567703247, "step": 1013 }, { "epoch": 1.2441717791411042, "grad_norm": 0.22054867446422577, "learning_rate": 4.9934367889274396e-05, "loss": 0.8406109809875488, "step": 1014 }, { "epoch": 1.2453987730061349, "grad_norm": 0.1761612594127655, "learning_rate": 4.9934003933139845e-05, "loss": 0.8685232996940613, "step": 1015 }, { "epoch": 1.2466257668711656, "grad_norm": 0.17665913701057434, "learning_rate": 4.993363897198671e-05, "loss": 0.9134083986282349, "step": 1016 }, { "epoch": 1.2478527607361962, "grad_norm": 0.18350182473659515, "learning_rate": 4.993327300582969e-05, "loss": 0.9086629152297974, "step": 1017 }, { "epoch": 1.2490797546012269, "grad_norm": 0.1757018119096756, "learning_rate": 4.993290603468356e-05, "loss": 0.9537110328674316, "step": 1018 }, { "epoch": 1.2503067484662576, "grad_norm": 0.18542718887329102, "learning_rate": 4.993253805856309e-05, "loss": 0.9146935939788818, "step": 1019 }, { "epoch": 1.2515337423312882, "grad_norm": 0.16147474944591522, "learning_rate": 4.993216907748313e-05, "loss": 0.9635133743286133, "step": 1020 }, { "epoch": 1.252760736196319, "grad_norm": 0.17807425558567047, "learning_rate": 4.993179909145853e-05, "loss": 0.7939913272857666, "step": 1021 }, { "epoch": 1.2539877300613496, "grad_norm": 0.18043196201324463, "learning_rate": 4.993142810050423e-05, "loss": 1.0175446271896362, "step": 1022 }, { "epoch": 1.2552147239263804, "grad_norm": 0.21780681610107422, "learning_rate": 4.993105610463516e-05, "loss": 0.8579457998275757, "step": 1023 }, { "epoch": 1.2564417177914111, "grad_norm": 0.18463794887065887, "learning_rate": 4.9930683103866324e-05, "loss": 0.9604005813598633, "step": 1024 }, { "epoch": 1.2576687116564418, "grad_norm": 0.17024284601211548, "learning_rate": 4.993030909821277e-05, "loss": 1.0922260284423828, "step": 1025 }, { "epoch": 1.2588957055214725, "grad_norm": 0.19428075850009918, "learning_rate": 4.9929934087689544e-05, "loss": 0.8758835792541504, "step": 1026 }, { "epoch": 1.2601226993865031, "grad_norm": 0.1656341850757599, "learning_rate": 4.992955807231179e-05, "loss": 0.8799440860748291, "step": 1027 }, { "epoch": 1.2613496932515338, "grad_norm": 0.1700543910264969, "learning_rate": 4.9929181052094645e-05, "loss": 0.9844333529472351, "step": 1028 }, { "epoch": 1.2625766871165645, "grad_norm": 0.24296683073043823, "learning_rate": 4.992880302705331e-05, "loss": 0.8723282814025879, "step": 1029 }, { "epoch": 1.2638036809815951, "grad_norm": 0.20251022279262543, "learning_rate": 4.992842399720302e-05, "loss": 1.0410882234573364, "step": 1030 }, { "epoch": 1.2650306748466258, "grad_norm": 0.21203359961509705, "learning_rate": 4.992804396255907e-05, "loss": 0.8603835701942444, "step": 1031 }, { "epoch": 1.2662576687116565, "grad_norm": 0.1934172660112381, "learning_rate": 4.992766292313675e-05, "loss": 1.0839803218841553, "step": 1032 }, { "epoch": 1.2674846625766871, "grad_norm": 0.1950322836637497, "learning_rate": 4.992728087895144e-05, "loss": 0.8867466449737549, "step": 1033 }, { "epoch": 1.2687116564417178, "grad_norm": 0.19874346256256104, "learning_rate": 4.992689783001854e-05, "loss": 0.8085262775421143, "step": 1034 }, { "epoch": 1.2699386503067485, "grad_norm": 0.17692801356315613, "learning_rate": 4.9926513776353475e-05, "loss": 0.9929046034812927, "step": 1035 }, { "epoch": 1.2711656441717791, "grad_norm": 0.1758522391319275, "learning_rate": 4.992612871797173e-05, "loss": 0.8156089782714844, "step": 1036 }, { "epoch": 1.2723926380368098, "grad_norm": 0.19482409954071045, "learning_rate": 4.992574265488883e-05, "loss": 0.8959323763847351, "step": 1037 }, { "epoch": 1.2736196319018405, "grad_norm": 0.18325966596603394, "learning_rate": 4.992535558712034e-05, "loss": 0.9411832690238953, "step": 1038 }, { "epoch": 1.2748466257668711, "grad_norm": 0.1651957482099533, "learning_rate": 4.992496751468185e-05, "loss": 1.1345562934875488, "step": 1039 }, { "epoch": 1.2760736196319018, "grad_norm": 0.2071172297000885, "learning_rate": 4.992457843758901e-05, "loss": 1.040527582168579, "step": 1040 }, { "epoch": 1.2773006134969325, "grad_norm": 0.1585303097963333, "learning_rate": 4.9924188355857494e-05, "loss": 0.8893647789955139, "step": 1041 }, { "epoch": 1.2785276073619631, "grad_norm": 0.1858893781900406, "learning_rate": 4.992379726950304e-05, "loss": 0.9167636036872864, "step": 1042 }, { "epoch": 1.2797546012269938, "grad_norm": 0.17721650004386902, "learning_rate": 4.99234051785414e-05, "loss": 0.9651562571525574, "step": 1043 }, { "epoch": 1.2809815950920245, "grad_norm": 0.20434613525867462, "learning_rate": 4.9923012082988374e-05, "loss": 0.9494757652282715, "step": 1044 }, { "epoch": 1.2822085889570551, "grad_norm": 0.21380354464054108, "learning_rate": 4.992261798285982e-05, "loss": 0.8322881460189819, "step": 1045 }, { "epoch": 1.2834355828220858, "grad_norm": 0.18785732984542847, "learning_rate": 4.992222287817162e-05, "loss": 0.8540229797363281, "step": 1046 }, { "epoch": 1.2846625766871167, "grad_norm": 0.20361781120300293, "learning_rate": 4.992182676893969e-05, "loss": 0.8460529446601868, "step": 1047 }, { "epoch": 1.2858895705521474, "grad_norm": 0.18874792754650116, "learning_rate": 4.992142965518001e-05, "loss": 0.9283848404884338, "step": 1048 }, { "epoch": 1.287116564417178, "grad_norm": 0.1750839352607727, "learning_rate": 4.992103153690857e-05, "loss": 0.9073951244354248, "step": 1049 }, { "epoch": 1.2883435582822087, "grad_norm": 0.17138782143592834, "learning_rate": 4.992063241414143e-05, "loss": 0.9176533222198486, "step": 1050 }, { "epoch": 1.2895705521472394, "grad_norm": 0.1619250774383545, "learning_rate": 4.992023228689467e-05, "loss": 0.9552910327911377, "step": 1051 }, { "epoch": 1.29079754601227, "grad_norm": 0.1691872477531433, "learning_rate": 4.9919831155184426e-05, "loss": 0.8672565221786499, "step": 1052 }, { "epoch": 1.2920245398773007, "grad_norm": 0.1688535511493683, "learning_rate": 4.991942901902686e-05, "loss": 0.8998371958732605, "step": 1053 }, { "epoch": 1.2932515337423314, "grad_norm": 0.1993398368358612, "learning_rate": 4.991902587843819e-05, "loss": 0.7158814668655396, "step": 1054 }, { "epoch": 1.294478527607362, "grad_norm": 0.1731054037809372, "learning_rate": 4.9918621733434646e-05, "loss": 0.8957094550132751, "step": 1055 }, { "epoch": 1.2957055214723927, "grad_norm": 0.20693030953407288, "learning_rate": 4.991821658403253e-05, "loss": 0.8478323221206665, "step": 1056 }, { "epoch": 1.2969325153374234, "grad_norm": 0.16395339369773865, "learning_rate": 4.991781043024818e-05, "loss": 0.916109025478363, "step": 1057 }, { "epoch": 1.298159509202454, "grad_norm": 0.16140156984329224, "learning_rate": 4.991740327209795e-05, "loss": 0.8189965486526489, "step": 1058 }, { "epoch": 1.2993865030674847, "grad_norm": 0.2052025943994522, "learning_rate": 4.9916995109598274e-05, "loss": 0.8088475465774536, "step": 1059 }, { "epoch": 1.3006134969325154, "grad_norm": 0.18414488434791565, "learning_rate": 4.991658594276558e-05, "loss": 0.9949004650115967, "step": 1060 }, { "epoch": 1.301840490797546, "grad_norm": 0.1646610051393509, "learning_rate": 4.991617577161637e-05, "loss": 0.9063264727592468, "step": 1061 }, { "epoch": 1.3030674846625767, "grad_norm": 0.21607442200183868, "learning_rate": 4.9915764596167195e-05, "loss": 0.8949761390686035, "step": 1062 }, { "epoch": 1.3042944785276074, "grad_norm": 0.17027616500854492, "learning_rate": 4.991535241643459e-05, "loss": 0.8895565867424011, "step": 1063 }, { "epoch": 1.305521472392638, "grad_norm": 0.21171486377716064, "learning_rate": 4.99149392324352e-05, "loss": 0.8791182637214661, "step": 1064 }, { "epoch": 1.3067484662576687, "grad_norm": 0.17898984253406525, "learning_rate": 4.991452504418567e-05, "loss": 0.9444648027420044, "step": 1065 }, { "epoch": 1.3079754601226994, "grad_norm": 0.20688410103321075, "learning_rate": 4.991410985170269e-05, "loss": 0.9440363645553589, "step": 1066 }, { "epoch": 1.30920245398773, "grad_norm": 0.17011263966560364, "learning_rate": 4.9913693655003e-05, "loss": 0.872612476348877, "step": 1067 }, { "epoch": 1.3104294478527607, "grad_norm": 0.22123154997825623, "learning_rate": 4.991327645410337e-05, "loss": 0.8139020204544067, "step": 1068 }, { "epoch": 1.3116564417177914, "grad_norm": 0.2029227614402771, "learning_rate": 4.991285824902063e-05, "loss": 0.8186708092689514, "step": 1069 }, { "epoch": 1.312883435582822, "grad_norm": 0.20076178014278412, "learning_rate": 4.991243903977163e-05, "loss": 0.81966632604599, "step": 1070 }, { "epoch": 1.3141104294478527, "grad_norm": 0.21211187541484833, "learning_rate": 4.991201882637325e-05, "loss": 0.9460061192512512, "step": 1071 }, { "epoch": 1.3153374233128834, "grad_norm": 0.1598248928785324, "learning_rate": 4.9911597608842455e-05, "loss": 1.0563757419586182, "step": 1072 }, { "epoch": 1.316564417177914, "grad_norm": 0.21523648500442505, "learning_rate": 4.9911175387196205e-05, "loss": 0.8427826166152954, "step": 1073 }, { "epoch": 1.3177914110429447, "grad_norm": 0.16503562033176422, "learning_rate": 4.991075216145153e-05, "loss": 0.8680646419525146, "step": 1074 }, { "epoch": 1.3190184049079754, "grad_norm": 0.1715267449617386, "learning_rate": 4.991032793162548e-05, "loss": 0.8438676595687866, "step": 1075 }, { "epoch": 1.320245398773006, "grad_norm": 0.169235497713089, "learning_rate": 4.990990269773516e-05, "loss": 0.8722925186157227, "step": 1076 }, { "epoch": 1.3214723926380367, "grad_norm": 0.18702587485313416, "learning_rate": 4.99094764597977e-05, "loss": 0.9735938310623169, "step": 1077 }, { "epoch": 1.3226993865030674, "grad_norm": 0.17994539439678192, "learning_rate": 4.99090492178303e-05, "loss": 1.1357438564300537, "step": 1078 }, { "epoch": 1.323926380368098, "grad_norm": 0.16433142125606537, "learning_rate": 4.990862097185015e-05, "loss": 0.8564401865005493, "step": 1079 }, { "epoch": 1.3251533742331287, "grad_norm": 0.16000774502754211, "learning_rate": 4.990819172187454e-05, "loss": 0.9662036895751953, "step": 1080 }, { "epoch": 1.3263803680981594, "grad_norm": 0.1707119643688202, "learning_rate": 4.990776146792076e-05, "loss": 1.0124988555908203, "step": 1081 }, { "epoch": 1.32760736196319, "grad_norm": 0.20684169232845306, "learning_rate": 4.9907330210006164e-05, "loss": 0.9282799959182739, "step": 1082 }, { "epoch": 1.3288343558282207, "grad_norm": 0.18298691511154175, "learning_rate": 4.9906897948148115e-05, "loss": 0.9747853875160217, "step": 1083 }, { "epoch": 1.3300613496932514, "grad_norm": 0.22072885930538177, "learning_rate": 4.990646468236405e-05, "loss": 0.9639520049095154, "step": 1084 }, { "epoch": 1.331288343558282, "grad_norm": 0.17817388474941254, "learning_rate": 4.990603041267143e-05, "loss": 0.8648935556411743, "step": 1085 }, { "epoch": 1.332515337423313, "grad_norm": 0.17207403481006622, "learning_rate": 4.9905595139087755e-05, "loss": 0.9178932905197144, "step": 1086 }, { "epoch": 1.3337423312883436, "grad_norm": 0.20243100821971893, "learning_rate": 4.990515886163059e-05, "loss": 1.024980068206787, "step": 1087 }, { "epoch": 1.3349693251533743, "grad_norm": 0.2063990980386734, "learning_rate": 4.990472158031748e-05, "loss": 0.871686577796936, "step": 1088 }, { "epoch": 1.336196319018405, "grad_norm": 0.20303654670715332, "learning_rate": 4.990428329516608e-05, "loss": 0.9161834716796875, "step": 1089 }, { "epoch": 1.3374233128834356, "grad_norm": 0.1944132149219513, "learning_rate": 4.9903844006194056e-05, "loss": 0.8572983145713806, "step": 1090 }, { "epoch": 1.3386503067484663, "grad_norm": 0.21101436018943787, "learning_rate": 4.9903403713419106e-05, "loss": 0.7803308963775635, "step": 1091 }, { "epoch": 1.339877300613497, "grad_norm": 0.16167840361595154, "learning_rate": 4.990296241685897e-05, "loss": 0.7813881635665894, "step": 1092 }, { "epoch": 1.3411042944785276, "grad_norm": 0.18547356128692627, "learning_rate": 4.990252011653145e-05, "loss": 0.8526263236999512, "step": 1093 }, { "epoch": 1.3423312883435583, "grad_norm": 0.23035338521003723, "learning_rate": 4.990207681245437e-05, "loss": 0.8263249397277832, "step": 1094 }, { "epoch": 1.343558282208589, "grad_norm": 0.16541273891925812, "learning_rate": 4.99016325046456e-05, "loss": 1.0483744144439697, "step": 1095 }, { "epoch": 1.3447852760736196, "grad_norm": 0.1755669116973877, "learning_rate": 4.9901187193123045e-05, "loss": 0.7771872282028198, "step": 1096 }, { "epoch": 1.3460122699386503, "grad_norm": 0.1706913709640503, "learning_rate": 4.990074087790465e-05, "loss": 1.0094234943389893, "step": 1097 }, { "epoch": 1.347239263803681, "grad_norm": 0.16733422875404358, "learning_rate": 4.990029355900841e-05, "loss": 0.938036322593689, "step": 1098 }, { "epoch": 1.3484662576687116, "grad_norm": 0.19196778535842896, "learning_rate": 4.989984523645236e-05, "loss": 0.7951363325119019, "step": 1099 }, { "epoch": 1.3496932515337423, "grad_norm": 0.20872828364372253, "learning_rate": 4.9899395910254556e-05, "loss": 0.9621920585632324, "step": 1100 }, { "epoch": 1.350920245398773, "grad_norm": 0.1800566017627716, "learning_rate": 4.989894558043312e-05, "loss": 0.9680010080337524, "step": 1101 }, { "epoch": 1.3521472392638036, "grad_norm": 0.20108072459697723, "learning_rate": 4.9898494247006205e-05, "loss": 0.9006564617156982, "step": 1102 }, { "epoch": 1.3533742331288343, "grad_norm": 0.16715992987155914, "learning_rate": 4.9898041909992e-05, "loss": 0.8654859066009521, "step": 1103 }, { "epoch": 1.354601226993865, "grad_norm": 0.2020353525876999, "learning_rate": 4.989758856940874e-05, "loss": 0.9326794147491455, "step": 1104 }, { "epoch": 1.3558282208588956, "grad_norm": 0.1819167286157608, "learning_rate": 4.989713422527469e-05, "loss": 0.8781353831291199, "step": 1105 }, { "epoch": 1.3570552147239263, "grad_norm": 0.21889522671699524, "learning_rate": 4.989667887760816e-05, "loss": 0.9967849254608154, "step": 1106 }, { "epoch": 1.358282208588957, "grad_norm": 0.175958514213562, "learning_rate": 4.9896222526427526e-05, "loss": 0.9253915548324585, "step": 1107 }, { "epoch": 1.3595092024539877, "grad_norm": 0.17569854855537415, "learning_rate": 4.9895765171751165e-05, "loss": 0.9265354871749878, "step": 1108 }, { "epoch": 1.3607361963190185, "grad_norm": 0.1798601597547531, "learning_rate": 4.989530681359751e-05, "loss": 1.072352647781372, "step": 1109 }, { "epoch": 1.3619631901840492, "grad_norm": 0.18685944378376007, "learning_rate": 4.9894847451985045e-05, "loss": 0.9472787976264954, "step": 1110 }, { "epoch": 1.3631901840490799, "grad_norm": 0.1633690744638443, "learning_rate": 4.9894387086932285e-05, "loss": 0.9938599467277527, "step": 1111 }, { "epoch": 1.3644171779141105, "grad_norm": 0.16508924961090088, "learning_rate": 4.989392571845777e-05, "loss": 0.9589614868164062, "step": 1112 }, { "epoch": 1.3656441717791412, "grad_norm": 0.17537103593349457, "learning_rate": 4.989346334658012e-05, "loss": 0.9295506477355957, "step": 1113 }, { "epoch": 1.3668711656441719, "grad_norm": 0.1886804848909378, "learning_rate": 4.989299997131796e-05, "loss": 0.8797487020492554, "step": 1114 }, { "epoch": 1.3680981595092025, "grad_norm": 0.23160924017429352, "learning_rate": 4.9892535592689966e-05, "loss": 0.7751191854476929, "step": 1115 }, { "epoch": 1.3693251533742332, "grad_norm": 0.21519026160240173, "learning_rate": 4.9892070210714856e-05, "loss": 0.8329159021377563, "step": 1116 }, { "epoch": 1.3705521472392639, "grad_norm": 0.20729094743728638, "learning_rate": 4.989160382541139e-05, "loss": 0.7989648580551147, "step": 1117 }, { "epoch": 1.3717791411042946, "grad_norm": 0.17955724895000458, "learning_rate": 4.989113643679837e-05, "loss": 0.7964174747467041, "step": 1118 }, { "epoch": 1.3730061349693252, "grad_norm": 0.19118526577949524, "learning_rate": 4.9890668044894636e-05, "loss": 1.0576101541519165, "step": 1119 }, { "epoch": 1.3742331288343559, "grad_norm": 0.20061641931533813, "learning_rate": 4.9890198649719055e-05, "loss": 0.8343989849090576, "step": 1120 }, { "epoch": 1.3754601226993866, "grad_norm": 0.15578889846801758, "learning_rate": 4.988972825129056e-05, "loss": 1.0232821702957153, "step": 1121 }, { "epoch": 1.3766871165644172, "grad_norm": 0.17875559628009796, "learning_rate": 4.988925684962811e-05, "loss": 0.9729039072990417, "step": 1122 }, { "epoch": 1.377914110429448, "grad_norm": 0.18860922753810883, "learning_rate": 4.9888784444750694e-05, "loss": 0.9396408796310425, "step": 1123 }, { "epoch": 1.3791411042944786, "grad_norm": 0.21458101272583008, "learning_rate": 4.988831103667737e-05, "loss": 0.9539918899536133, "step": 1124 }, { "epoch": 1.3803680981595092, "grad_norm": 0.2138974815607071, "learning_rate": 4.9887836625427206e-05, "loss": 0.8933693766593933, "step": 1125 }, { "epoch": 1.38159509202454, "grad_norm": 0.18779593706130981, "learning_rate": 4.988736121101933e-05, "loss": 0.8231431245803833, "step": 1126 }, { "epoch": 1.3828220858895706, "grad_norm": 0.16562288999557495, "learning_rate": 4.988688479347291e-05, "loss": 1.0576367378234863, "step": 1127 }, { "epoch": 1.3840490797546012, "grad_norm": 0.18650420010089874, "learning_rate": 4.988640737280714e-05, "loss": 0.9392659664154053, "step": 1128 }, { "epoch": 1.385276073619632, "grad_norm": 0.19175443053245544, "learning_rate": 4.9885928949041274e-05, "loss": 0.8749042749404907, "step": 1129 }, { "epoch": 1.3865030674846626, "grad_norm": 0.18337461352348328, "learning_rate": 4.9885449522194574e-05, "loss": 0.9378517270088196, "step": 1130 }, { "epoch": 1.3877300613496932, "grad_norm": 0.2096027433872223, "learning_rate": 4.988496909228639e-05, "loss": 0.8104312419891357, "step": 1131 }, { "epoch": 1.388957055214724, "grad_norm": 0.23420222103595734, "learning_rate": 4.9884487659336075e-05, "loss": 0.8442990779876709, "step": 1132 }, { "epoch": 1.3901840490797546, "grad_norm": 0.16162247955799103, "learning_rate": 4.988400522336304e-05, "loss": 0.9406664371490479, "step": 1133 }, { "epoch": 1.3914110429447852, "grad_norm": 0.2085794359445572, "learning_rate": 4.988352178438672e-05, "loss": 0.7633527517318726, "step": 1134 }, { "epoch": 1.392638036809816, "grad_norm": 0.16430480778217316, "learning_rate": 4.9883037342426606e-05, "loss": 0.8538426160812378, "step": 1135 }, { "epoch": 1.3938650306748466, "grad_norm": 0.21532666683197021, "learning_rate": 4.988255189750223e-05, "loss": 0.922752857208252, "step": 1136 }, { "epoch": 1.3950920245398772, "grad_norm": 0.1723543405532837, "learning_rate": 4.988206544963314e-05, "loss": 0.943209707736969, "step": 1137 }, { "epoch": 1.396319018404908, "grad_norm": 0.1459054797887802, "learning_rate": 4.9881577998838975e-05, "loss": 0.8612825870513916, "step": 1138 }, { "epoch": 1.3975460122699386, "grad_norm": 0.18873783946037292, "learning_rate": 4.988108954513936e-05, "loss": 0.8804869651794434, "step": 1139 }, { "epoch": 1.3987730061349692, "grad_norm": 0.21498709917068481, "learning_rate": 4.9880600088553986e-05, "loss": 0.8805394172668457, "step": 1140 }, { "epoch": 1.4, "grad_norm": 0.19018438458442688, "learning_rate": 4.9880109629102586e-05, "loss": 0.9639159440994263, "step": 1141 }, { "epoch": 1.4012269938650306, "grad_norm": 0.22569842636585236, "learning_rate": 4.987961816680492e-05, "loss": 0.9536948204040527, "step": 1142 }, { "epoch": 1.4024539877300612, "grad_norm": 0.15461353957653046, "learning_rate": 4.9879125701680816e-05, "loss": 0.9603186845779419, "step": 1143 }, { "epoch": 1.403680981595092, "grad_norm": 0.18943147361278534, "learning_rate": 4.9878632233750114e-05, "loss": 0.9638735055923462, "step": 1144 }, { "epoch": 1.4049079754601226, "grad_norm": 0.19670753180980682, "learning_rate": 4.987813776303269e-05, "loss": 0.8466602563858032, "step": 1145 }, { "epoch": 1.4061349693251532, "grad_norm": 0.26385563611984253, "learning_rate": 4.9877642289548496e-05, "loss": 0.8525388836860657, "step": 1146 }, { "epoch": 1.407361963190184, "grad_norm": 0.19693702459335327, "learning_rate": 4.9877145813317486e-05, "loss": 0.8487154841423035, "step": 1147 }, { "epoch": 1.4085889570552146, "grad_norm": 0.1904749721288681, "learning_rate": 4.987664833435969e-05, "loss": 0.9247676134109497, "step": 1148 }, { "epoch": 1.4098159509202455, "grad_norm": 0.18672843277454376, "learning_rate": 4.987614985269515e-05, "loss": 1.0691659450531006, "step": 1149 }, { "epoch": 1.4110429447852761, "grad_norm": 0.20420436561107635, "learning_rate": 4.987565036834395e-05, "loss": 0.8155128955841064, "step": 1150 }, { "epoch": 1.4122699386503068, "grad_norm": 0.20449402928352356, "learning_rate": 4.987514988132624e-05, "loss": 0.9052773118019104, "step": 1151 }, { "epoch": 1.4134969325153375, "grad_norm": 0.17604197561740875, "learning_rate": 4.987464839166218e-05, "loss": 0.9967154264450073, "step": 1152 }, { "epoch": 1.4147239263803681, "grad_norm": 0.1744452863931656, "learning_rate": 4.987414589937199e-05, "loss": 0.9157355427742004, "step": 1153 }, { "epoch": 1.4159509202453988, "grad_norm": 0.18961799144744873, "learning_rate": 4.9873642404475916e-05, "loss": 0.8274749517440796, "step": 1154 }, { "epoch": 1.4171779141104295, "grad_norm": 0.20351406931877136, "learning_rate": 4.9873137906994264e-05, "loss": 0.9179465174674988, "step": 1155 }, { "epoch": 1.4184049079754601, "grad_norm": 0.19829154014587402, "learning_rate": 4.987263240694736e-05, "loss": 0.8294053077697754, "step": 1156 }, { "epoch": 1.4196319018404908, "grad_norm": 0.1980678290128708, "learning_rate": 4.987212590435559e-05, "loss": 0.9253721833229065, "step": 1157 }, { "epoch": 1.4208588957055215, "grad_norm": 0.1794218122959137, "learning_rate": 4.987161839923936e-05, "loss": 0.9924664497375488, "step": 1158 }, { "epoch": 1.4220858895705522, "grad_norm": 0.15240323543548584, "learning_rate": 4.9871109891619124e-05, "loss": 0.9590776562690735, "step": 1159 }, { "epoch": 1.4233128834355828, "grad_norm": 0.18528641760349274, "learning_rate": 4.9870600381515386e-05, "loss": 1.0283924341201782, "step": 1160 }, { "epoch": 1.4245398773006135, "grad_norm": 0.15886536240577698, "learning_rate": 4.987008986894868e-05, "loss": 0.8749122619628906, "step": 1161 }, { "epoch": 1.4257668711656442, "grad_norm": 0.18502865731716156, "learning_rate": 4.9869578353939575e-05, "loss": 0.9972305297851562, "step": 1162 }, { "epoch": 1.4269938650306748, "grad_norm": 0.18499596416950226, "learning_rate": 4.986906583650871e-05, "loss": 0.9044044017791748, "step": 1163 }, { "epoch": 1.4282208588957055, "grad_norm": 0.2089417278766632, "learning_rate": 4.986855231667672e-05, "loss": 0.9416965842247009, "step": 1164 }, { "epoch": 1.4294478527607362, "grad_norm": 0.1896720826625824, "learning_rate": 4.986803779446432e-05, "loss": 0.8748823404312134, "step": 1165 }, { "epoch": 1.4306748466257668, "grad_norm": 0.18214109539985657, "learning_rate": 4.986752226989224e-05, "loss": 0.9979968667030334, "step": 1166 }, { "epoch": 1.4319018404907975, "grad_norm": 0.1989288628101349, "learning_rate": 4.9867005742981254e-05, "loss": 1.0325764417648315, "step": 1167 }, { "epoch": 1.4331288343558282, "grad_norm": 0.1887117177248001, "learning_rate": 4.98664882137522e-05, "loss": 0.9078900218009949, "step": 1168 }, { "epoch": 1.4343558282208588, "grad_norm": 0.19429640471935272, "learning_rate": 4.9865969682225925e-05, "loss": 0.7989035844802856, "step": 1169 }, { "epoch": 1.4355828220858895, "grad_norm": 0.18662074208259583, "learning_rate": 4.986545014842333e-05, "loss": 0.9838145971298218, "step": 1170 }, { "epoch": 1.4368098159509202, "grad_norm": 0.20356538891792297, "learning_rate": 4.9864929612365366e-05, "loss": 1.001339316368103, "step": 1171 }, { "epoch": 1.438036809815951, "grad_norm": 0.2055252492427826, "learning_rate": 4.9864408074072996e-05, "loss": 0.8797118663787842, "step": 1172 }, { "epoch": 1.4392638036809817, "grad_norm": 0.17059247195720673, "learning_rate": 4.986388553356726e-05, "loss": 1.0193331241607666, "step": 1173 }, { "epoch": 1.4404907975460124, "grad_norm": 0.18234743177890778, "learning_rate": 4.98633619908692e-05, "loss": 0.9490376710891724, "step": 1174 }, { "epoch": 1.441717791411043, "grad_norm": 0.167744979262352, "learning_rate": 4.986283744599994e-05, "loss": 0.9579806327819824, "step": 1175 }, { "epoch": 1.4429447852760737, "grad_norm": 0.1863568127155304, "learning_rate": 4.9862311898980606e-05, "loss": 0.9314627647399902, "step": 1176 }, { "epoch": 1.4441717791411044, "grad_norm": 0.24161089956760406, "learning_rate": 4.98617853498324e-05, "loss": 0.7748671770095825, "step": 1177 }, { "epoch": 1.445398773006135, "grad_norm": 0.18998147547245026, "learning_rate": 4.986125779857653e-05, "loss": 0.9568166732788086, "step": 1178 }, { "epoch": 1.4466257668711657, "grad_norm": 0.1730678677558899, "learning_rate": 4.986072924523426e-05, "loss": 1.0047098398208618, "step": 1179 }, { "epoch": 1.4478527607361964, "grad_norm": 0.20666338503360748, "learning_rate": 4.98601996898269e-05, "loss": 0.9095351696014404, "step": 1180 }, { "epoch": 1.449079754601227, "grad_norm": 0.18213781714439392, "learning_rate": 4.98596691323758e-05, "loss": 0.8062865734100342, "step": 1181 }, { "epoch": 1.4503067484662577, "grad_norm": 0.18743200600147247, "learning_rate": 4.9859137572902334e-05, "loss": 0.9393439292907715, "step": 1182 }, { "epoch": 1.4515337423312884, "grad_norm": 0.19392849504947662, "learning_rate": 4.9858605011427936e-05, "loss": 0.8287757635116577, "step": 1183 }, { "epoch": 1.452760736196319, "grad_norm": 0.18488723039627075, "learning_rate": 4.985807144797407e-05, "loss": 0.871588945388794, "step": 1184 }, { "epoch": 1.4539877300613497, "grad_norm": 0.1851603090763092, "learning_rate": 4.985753688256224e-05, "loss": 0.8319457769393921, "step": 1185 }, { "epoch": 1.4552147239263804, "grad_norm": 0.17359931766986847, "learning_rate": 4.9857001315214e-05, "loss": 0.9853024482727051, "step": 1186 }, { "epoch": 1.456441717791411, "grad_norm": 0.20796433091163635, "learning_rate": 4.985646474595092e-05, "loss": 0.9299944639205933, "step": 1187 }, { "epoch": 1.4576687116564417, "grad_norm": 0.17456236481666565, "learning_rate": 4.9855927174794646e-05, "loss": 1.0164738893508911, "step": 1188 }, { "epoch": 1.4588957055214724, "grad_norm": 0.16242170333862305, "learning_rate": 4.985538860176684e-05, "loss": 0.8822245597839355, "step": 1189 }, { "epoch": 1.460122699386503, "grad_norm": 0.20580436289310455, "learning_rate": 4.9854849026889206e-05, "loss": 0.8829412460327148, "step": 1190 }, { "epoch": 1.4613496932515337, "grad_norm": 0.1944718062877655, "learning_rate": 4.985430845018349e-05, "loss": 0.8640929460525513, "step": 1191 }, { "epoch": 1.4625766871165644, "grad_norm": 0.15854008495807648, "learning_rate": 4.9853766871671496e-05, "loss": 0.8969576358795166, "step": 1192 }, { "epoch": 1.463803680981595, "grad_norm": 0.18089702725410461, "learning_rate": 4.985322429137504e-05, "loss": 0.9456841945648193, "step": 1193 }, { "epoch": 1.4650306748466257, "grad_norm": 0.17778410017490387, "learning_rate": 4.9852680709316005e-05, "loss": 0.9935488700866699, "step": 1194 }, { "epoch": 1.4662576687116564, "grad_norm": 0.2204640656709671, "learning_rate": 4.985213612551628e-05, "loss": 0.8119841814041138, "step": 1195 }, { "epoch": 1.467484662576687, "grad_norm": 0.1953750103712082, "learning_rate": 4.985159053999784e-05, "loss": 1.037489891052246, "step": 1196 }, { "epoch": 1.4687116564417177, "grad_norm": 0.21036842465400696, "learning_rate": 4.985104395278266e-05, "loss": 0.8646558523178101, "step": 1197 }, { "epoch": 1.4699386503067484, "grad_norm": 0.21057817339897156, "learning_rate": 4.985049636389277e-05, "loss": 0.920552134513855, "step": 1198 }, { "epoch": 1.471165644171779, "grad_norm": 0.2018350511789322, "learning_rate": 4.984994777335025e-05, "loss": 0.7423895597457886, "step": 1199 }, { "epoch": 1.4723926380368098, "grad_norm": 0.1692741960287094, "learning_rate": 4.984939818117721e-05, "loss": 0.9976302981376648, "step": 1200 }, { "epoch": 1.4736196319018404, "grad_norm": 0.19404858350753784, "learning_rate": 4.9848847587395805e-05, "loss": 0.8115323781967163, "step": 1201 }, { "epoch": 1.474846625766871, "grad_norm": 0.19729742407798767, "learning_rate": 4.9848295992028225e-05, "loss": 1.026200294494629, "step": 1202 }, { "epoch": 1.4760736196319018, "grad_norm": 0.17253372073173523, "learning_rate": 4.98477433950967e-05, "loss": 0.9727370738983154, "step": 1203 }, { "epoch": 1.4773006134969324, "grad_norm": 0.18772660195827484, "learning_rate": 4.9847189796623505e-05, "loss": 0.9156450629234314, "step": 1204 }, { "epoch": 1.478527607361963, "grad_norm": 0.19318990409374237, "learning_rate": 4.984663519663097e-05, "loss": 0.9632110595703125, "step": 1205 }, { "epoch": 1.4797546012269938, "grad_norm": 0.1725410670042038, "learning_rate": 4.9846079595141415e-05, "loss": 0.9008842706680298, "step": 1206 }, { "epoch": 1.4809815950920244, "grad_norm": 0.16491542756557465, "learning_rate": 4.984552299217726e-05, "loss": 0.9349021315574646, "step": 1207 }, { "epoch": 1.482208588957055, "grad_norm": 0.19986127316951752, "learning_rate": 4.984496538776094e-05, "loss": 0.8801077008247375, "step": 1208 }, { "epoch": 1.4834355828220858, "grad_norm": 0.17559842765331268, "learning_rate": 4.984440678191493e-05, "loss": 0.826363205909729, "step": 1209 }, { "epoch": 1.4846625766871164, "grad_norm": 0.1942412406206131, "learning_rate": 4.984384717466173e-05, "loss": 0.9567005634307861, "step": 1210 }, { "epoch": 1.4858895705521473, "grad_norm": 0.19127009809017181, "learning_rate": 4.984328656602391e-05, "loss": 0.8822938203811646, "step": 1211 }, { "epoch": 1.487116564417178, "grad_norm": 0.193020761013031, "learning_rate": 4.984272495602407e-05, "loss": 0.9850732088088989, "step": 1212 }, { "epoch": 1.4883435582822087, "grad_norm": 0.19074974954128265, "learning_rate": 4.9842162344684836e-05, "loss": 0.8629568815231323, "step": 1213 }, { "epoch": 1.4895705521472393, "grad_norm": 0.23656028509140015, "learning_rate": 4.984159873202889e-05, "loss": 0.9051141142845154, "step": 1214 }, { "epoch": 1.49079754601227, "grad_norm": 0.16770417988300323, "learning_rate": 4.9841034118078947e-05, "loss": 0.9475696682929993, "step": 1215 }, { "epoch": 1.4920245398773007, "grad_norm": 0.18268711864948273, "learning_rate": 4.9840468502857776e-05, "loss": 0.8892676830291748, "step": 1216 }, { "epoch": 1.4932515337423313, "grad_norm": 0.20808525383472443, "learning_rate": 4.983990188638816e-05, "loss": 0.8560172319412231, "step": 1217 }, { "epoch": 1.494478527607362, "grad_norm": 0.1992543339729309, "learning_rate": 4.983933426869295e-05, "loss": 0.7798845171928406, "step": 1218 }, { "epoch": 1.4957055214723927, "grad_norm": 0.23684385418891907, "learning_rate": 4.9838765649795e-05, "loss": 0.7910186052322388, "step": 1219 }, { "epoch": 1.4969325153374233, "grad_norm": 0.16673968732357025, "learning_rate": 4.983819602971728e-05, "loss": 0.9263080358505249, "step": 1220 }, { "epoch": 1.498159509202454, "grad_norm": 0.16343262791633606, "learning_rate": 4.9837625408482696e-05, "loss": 1.0609714984893799, "step": 1221 }, { "epoch": 1.4993865030674847, "grad_norm": 0.19870206713676453, "learning_rate": 4.983705378611428e-05, "loss": 0.8523554801940918, "step": 1222 }, { "epoch": 1.5006134969325153, "grad_norm": 0.1983785778284073, "learning_rate": 4.983648116263506e-05, "loss": 0.9084864258766174, "step": 1223 }, { "epoch": 1.501840490797546, "grad_norm": 0.21824076771736145, "learning_rate": 4.983590753806812e-05, "loss": 0.8987696766853333, "step": 1224 }, { "epoch": 1.5030674846625767, "grad_norm": 0.1837080866098404, "learning_rate": 4.9835332912436584e-05, "loss": 0.8256120085716248, "step": 1225 }, { "epoch": 1.5042944785276073, "grad_norm": 0.17411094903945923, "learning_rate": 4.983475728576361e-05, "loss": 0.9645184874534607, "step": 1226 }, { "epoch": 1.505521472392638, "grad_norm": 0.2136937379837036, "learning_rate": 4.98341806580724e-05, "loss": 0.9659698009490967, "step": 1227 }, { "epoch": 1.5067484662576687, "grad_norm": 0.1978635936975479, "learning_rate": 4.98336030293862e-05, "loss": 0.7184720039367676, "step": 1228 }, { "epoch": 1.5079754601226993, "grad_norm": 0.18232986330986023, "learning_rate": 4.983302439972829e-05, "loss": 0.8214398622512817, "step": 1229 }, { "epoch": 1.50920245398773, "grad_norm": 0.21803617477416992, "learning_rate": 4.983244476912199e-05, "loss": 0.8293343782424927, "step": 1230 }, { "epoch": 1.510429447852761, "grad_norm": 0.16214418411254883, "learning_rate": 4.9831864137590664e-05, "loss": 0.7952291965484619, "step": 1231 }, { "epoch": 1.5116564417177916, "grad_norm": 0.17155008018016815, "learning_rate": 4.983128250515772e-05, "loss": 0.8816521167755127, "step": 1232 }, { "epoch": 1.5128834355828222, "grad_norm": 0.18034426867961884, "learning_rate": 4.9830699871846596e-05, "loss": 0.9045590162277222, "step": 1233 }, { "epoch": 1.514110429447853, "grad_norm": 0.1783251315355301, "learning_rate": 4.983011623768078e-05, "loss": 1.0336397886276245, "step": 1234 }, { "epoch": 1.5153374233128836, "grad_norm": 0.1640608161687851, "learning_rate": 4.9829531602683796e-05, "loss": 0.8256760835647583, "step": 1235 }, { "epoch": 1.5165644171779142, "grad_norm": 0.19238907098770142, "learning_rate": 4.9828945966879215e-05, "loss": 0.8059515953063965, "step": 1236 }, { "epoch": 1.517791411042945, "grad_norm": 0.19588683545589447, "learning_rate": 4.982835933029063e-05, "loss": 0.7083983421325684, "step": 1237 }, { "epoch": 1.5190184049079756, "grad_norm": 0.1832931637763977, "learning_rate": 4.9827771692941694e-05, "loss": 0.9435744285583496, "step": 1238 }, { "epoch": 1.5202453987730062, "grad_norm": 0.2113242894411087, "learning_rate": 4.98271830548561e-05, "loss": 0.8815423250198364, "step": 1239 }, { "epoch": 1.521472392638037, "grad_norm": 0.180886372923851, "learning_rate": 4.982659341605755e-05, "loss": 0.8833354711532593, "step": 1240 }, { "epoch": 1.5226993865030676, "grad_norm": 0.21186549961566925, "learning_rate": 4.982600277656985e-05, "loss": 0.8947727680206299, "step": 1241 }, { "epoch": 1.5239263803680982, "grad_norm": 0.183975487947464, "learning_rate": 4.9825411136416766e-05, "loss": 1.0511505603790283, "step": 1242 }, { "epoch": 1.525153374233129, "grad_norm": 0.16139742732048035, "learning_rate": 4.9824818495622166e-05, "loss": 0.8933581113815308, "step": 1243 }, { "epoch": 1.5263803680981596, "grad_norm": 0.19275887310504913, "learning_rate": 4.982422485420994e-05, "loss": 0.7642386555671692, "step": 1244 }, { "epoch": 1.5276073619631902, "grad_norm": 0.23395943641662598, "learning_rate": 4.982363021220401e-05, "loss": 0.7444738745689392, "step": 1245 }, { "epoch": 1.528834355828221, "grad_norm": 0.19990074634552002, "learning_rate": 4.982303456962834e-05, "loss": 0.9945081472396851, "step": 1246 }, { "epoch": 1.5300613496932516, "grad_norm": 0.17600737512111664, "learning_rate": 4.9822437926506946e-05, "loss": 1.0373889207839966, "step": 1247 }, { "epoch": 1.5312883435582823, "grad_norm": 0.17150676250457764, "learning_rate": 4.982184028286388e-05, "loss": 0.7713488936424255, "step": 1248 }, { "epoch": 1.532515337423313, "grad_norm": 0.17861782014369965, "learning_rate": 4.982124163872323e-05, "loss": 0.9978914260864258, "step": 1249 }, { "epoch": 1.5337423312883436, "grad_norm": 0.19749179482460022, "learning_rate": 4.9820641994109106e-05, "loss": 0.9543532729148865, "step": 1250 }, { "epoch": 1.5349693251533743, "grad_norm": 0.2188674807548523, "learning_rate": 4.98200413490457e-05, "loss": 0.90189528465271, "step": 1251 }, { "epoch": 1.536196319018405, "grad_norm": 0.24461953341960907, "learning_rate": 4.981943970355723e-05, "loss": 0.8011220097541809, "step": 1252 }, { "epoch": 1.5374233128834356, "grad_norm": 0.18217580020427704, "learning_rate": 4.981883705766791e-05, "loss": 1.0349534749984741, "step": 1253 }, { "epoch": 1.5386503067484663, "grad_norm": 0.1659940779209137, "learning_rate": 4.981823341140207e-05, "loss": 1.0626699924468994, "step": 1254 }, { "epoch": 1.539877300613497, "grad_norm": 0.19969500601291656, "learning_rate": 4.981762876478402e-05, "loss": 0.8893184661865234, "step": 1255 }, { "epoch": 1.5411042944785276, "grad_norm": 0.18401555716991425, "learning_rate": 4.981702311783814e-05, "loss": 0.9761160612106323, "step": 1256 }, { "epoch": 1.5423312883435583, "grad_norm": 0.1726689487695694, "learning_rate": 4.9816416470588836e-05, "loss": 0.8029924631118774, "step": 1257 }, { "epoch": 1.543558282208589, "grad_norm": 0.17055000364780426, "learning_rate": 4.981580882306056e-05, "loss": 0.9978471994400024, "step": 1258 }, { "epoch": 1.5447852760736196, "grad_norm": 0.1803646832704544, "learning_rate": 4.981520017527781e-05, "loss": 0.7489546537399292, "step": 1259 }, { "epoch": 1.5460122699386503, "grad_norm": 0.15044789016246796, "learning_rate": 4.9814590527265116e-05, "loss": 0.9161357879638672, "step": 1260 }, { "epoch": 1.547239263803681, "grad_norm": 0.2023218274116516, "learning_rate": 4.9813979879047056e-05, "loss": 1.0035831928253174, "step": 1261 }, { "epoch": 1.5484662576687116, "grad_norm": 0.16251428425312042, "learning_rate": 4.981336823064823e-05, "loss": 0.9169081449508667, "step": 1262 }, { "epoch": 1.5496932515337423, "grad_norm": 0.21290335059165955, "learning_rate": 4.981275558209331e-05, "loss": 0.9395977854728699, "step": 1263 }, { "epoch": 1.550920245398773, "grad_norm": 0.20758406817913055, "learning_rate": 4.981214193340697e-05, "loss": 0.8612538576126099, "step": 1264 }, { "epoch": 1.5521472392638036, "grad_norm": 0.1474982351064682, "learning_rate": 4.981152728461396e-05, "loss": 0.7738584280014038, "step": 1265 }, { "epoch": 1.5533742331288343, "grad_norm": 0.17471396923065186, "learning_rate": 4.981091163573905e-05, "loss": 0.933851957321167, "step": 1266 }, { "epoch": 1.554601226993865, "grad_norm": 0.19046546518802643, "learning_rate": 4.981029498680706e-05, "loss": 0.9434802532196045, "step": 1267 }, { "epoch": 1.5558282208588956, "grad_norm": 0.15216605365276337, "learning_rate": 4.9809677337842834e-05, "loss": 0.8368450999259949, "step": 1268 }, { "epoch": 1.5570552147239263, "grad_norm": 0.1871284395456314, "learning_rate": 4.980905868887127e-05, "loss": 0.9397052526473999, "step": 1269 }, { "epoch": 1.558282208588957, "grad_norm": 0.18366259336471558, "learning_rate": 4.980843903991732e-05, "loss": 0.8558623194694519, "step": 1270 }, { "epoch": 1.5595092024539876, "grad_norm": 0.16747161746025085, "learning_rate": 4.9807818391005937e-05, "loss": 0.9162375926971436, "step": 1271 }, { "epoch": 1.5607361963190183, "grad_norm": 0.15392498672008514, "learning_rate": 4.980719674216215e-05, "loss": 0.9587031006813049, "step": 1272 }, { "epoch": 1.561963190184049, "grad_norm": 0.18987806141376495, "learning_rate": 4.980657409341102e-05, "loss": 0.9112783670425415, "step": 1273 }, { "epoch": 1.5631901840490796, "grad_norm": 0.16945816576480865, "learning_rate": 4.980595044477764e-05, "loss": 0.9814726114273071, "step": 1274 }, { "epoch": 1.5644171779141103, "grad_norm": 0.19355462491512299, "learning_rate": 4.980532579628714e-05, "loss": 0.9928807020187378, "step": 1275 }, { "epoch": 1.565644171779141, "grad_norm": 0.18800993263721466, "learning_rate": 4.980470014796471e-05, "loss": 0.9072080254554749, "step": 1276 }, { "epoch": 1.5668711656441716, "grad_norm": 0.19659541547298431, "learning_rate": 4.980407349983556e-05, "loss": 0.9037590622901917, "step": 1277 }, { "epoch": 1.5680981595092025, "grad_norm": 0.1918146312236786, "learning_rate": 4.980344585192494e-05, "loss": 0.9030840992927551, "step": 1278 }, { "epoch": 1.5693251533742332, "grad_norm": 0.19144141674041748, "learning_rate": 4.980281720425818e-05, "loss": 0.8143747448921204, "step": 1279 }, { "epoch": 1.5705521472392638, "grad_norm": 0.27582794427871704, "learning_rate": 4.980218755686058e-05, "loss": 0.9714144468307495, "step": 1280 }, { "epoch": 1.5717791411042945, "grad_norm": 0.17736248672008514, "learning_rate": 4.9801556909757554e-05, "loss": 1.0316137075424194, "step": 1281 }, { "epoch": 1.5730061349693252, "grad_norm": 0.18420854210853577, "learning_rate": 4.98009252629745e-05, "loss": 1.0664772987365723, "step": 1282 }, { "epoch": 1.5742331288343558, "grad_norm": 0.1928582489490509, "learning_rate": 4.980029261653687e-05, "loss": 1.042616605758667, "step": 1283 }, { "epoch": 1.5754601226993865, "grad_norm": 0.17965300381183624, "learning_rate": 4.9799658970470187e-05, "loss": 0.9353423118591309, "step": 1284 }, { "epoch": 1.5766871165644172, "grad_norm": 0.15357518196105957, "learning_rate": 4.979902432479998e-05, "loss": 0.9782334566116333, "step": 1285 }, { "epoch": 1.5779141104294478, "grad_norm": 0.16921602189540863, "learning_rate": 4.9798388679551834e-05, "loss": 0.8942508101463318, "step": 1286 }, { "epoch": 1.5791411042944785, "grad_norm": 0.18951749801635742, "learning_rate": 4.9797752034751365e-05, "loss": 0.8504163026809692, "step": 1287 }, { "epoch": 1.5803680981595092, "grad_norm": 0.19246496260166168, "learning_rate": 4.979711439042424e-05, "loss": 0.8021432161331177, "step": 1288 }, { "epoch": 1.5815950920245399, "grad_norm": 0.20584334433078766, "learning_rate": 4.979647574659616e-05, "loss": 0.734963059425354, "step": 1289 }, { "epoch": 1.5828220858895705, "grad_norm": 0.20215895771980286, "learning_rate": 4.9795836103292856e-05, "loss": 0.8372102975845337, "step": 1290 }, { "epoch": 1.5840490797546012, "grad_norm": 0.16222013533115387, "learning_rate": 4.979519546054012e-05, "loss": 0.9538466930389404, "step": 1291 }, { "epoch": 1.5852760736196319, "grad_norm": 0.20427826046943665, "learning_rate": 4.979455381836378e-05, "loss": 0.9116246104240417, "step": 1292 }, { "epoch": 1.5865030674846625, "grad_norm": 0.16992385685443878, "learning_rate": 4.979391117678969e-05, "loss": 0.8737009763717651, "step": 1293 }, { "epoch": 1.5877300613496934, "grad_norm": 0.2065805047750473, "learning_rate": 4.9793267535843755e-05, "loss": 1.0102182626724243, "step": 1294 }, { "epoch": 1.588957055214724, "grad_norm": 0.1723962426185608, "learning_rate": 4.979262289555192e-05, "loss": 0.9132972955703735, "step": 1295 }, { "epoch": 1.5901840490797547, "grad_norm": 0.196980282664299, "learning_rate": 4.979197725594016e-05, "loss": 1.0324225425720215, "step": 1296 }, { "epoch": 1.5914110429447854, "grad_norm": 0.19559642672538757, "learning_rate": 4.979133061703451e-05, "loss": 0.8566784858703613, "step": 1297 }, { "epoch": 1.592638036809816, "grad_norm": 0.1606207937002182, "learning_rate": 4.979068297886102e-05, "loss": 0.9399926066398621, "step": 1298 }, { "epoch": 1.5938650306748468, "grad_norm": 0.1702912300825119, "learning_rate": 4.979003434144582e-05, "loss": 0.9267230033874512, "step": 1299 }, { "epoch": 1.5950920245398774, "grad_norm": 0.19440452754497528, "learning_rate": 4.9789384704815036e-05, "loss": 0.9391424655914307, "step": 1300 }, { "epoch": 1.596319018404908, "grad_norm": 0.17970800399780273, "learning_rate": 4.9788734068994845e-05, "loss": 0.9678232669830322, "step": 1301 }, { "epoch": 1.5975460122699388, "grad_norm": 0.16566388309001923, "learning_rate": 4.97880824340115e-05, "loss": 0.9349303245544434, "step": 1302 }, { "epoch": 1.5987730061349694, "grad_norm": 0.1999782919883728, "learning_rate": 4.978742979989123e-05, "loss": 0.9509814977645874, "step": 1303 }, { "epoch": 1.6, "grad_norm": 0.2471192330121994, "learning_rate": 4.978677616666038e-05, "loss": 0.7704371213912964, "step": 1304 }, { "epoch": 1.6012269938650308, "grad_norm": 0.16735784709453583, "learning_rate": 4.9786121534345265e-05, "loss": 1.0377693176269531, "step": 1305 }, { "epoch": 1.6024539877300614, "grad_norm": 0.15133002400398254, "learning_rate": 4.978546590297228e-05, "loss": 0.9787486791610718, "step": 1306 }, { "epoch": 1.603680981595092, "grad_norm": 0.1519252210855484, "learning_rate": 4.9784809272567867e-05, "loss": 0.9813492894172668, "step": 1307 }, { "epoch": 1.6049079754601228, "grad_norm": 0.16629168391227722, "learning_rate": 4.978415164315847e-05, "loss": 0.9434167146682739, "step": 1308 }, { "epoch": 1.6061349693251534, "grad_norm": 0.17980220913887024, "learning_rate": 4.978349301477062e-05, "loss": 0.9208977222442627, "step": 1309 }, { "epoch": 1.607361963190184, "grad_norm": 0.21979403495788574, "learning_rate": 4.978283338743084e-05, "loss": 0.7796117067337036, "step": 1310 }, { "epoch": 1.6085889570552148, "grad_norm": 0.1709652841091156, "learning_rate": 4.978217276116573e-05, "loss": 1.0015645027160645, "step": 1311 }, { "epoch": 1.6098159509202454, "grad_norm": 0.1487095057964325, "learning_rate": 4.9781511136001915e-05, "loss": 0.7876495122909546, "step": 1312 }, { "epoch": 1.611042944785276, "grad_norm": 0.17600707709789276, "learning_rate": 4.9780848511966074e-05, "loss": 0.9326908588409424, "step": 1313 }, { "epoch": 1.6122699386503068, "grad_norm": 0.20877906680107117, "learning_rate": 4.9780184889084905e-05, "loss": 0.851376473903656, "step": 1314 }, { "epoch": 1.6134969325153374, "grad_norm": 0.24897223711013794, "learning_rate": 4.977952026738515e-05, "loss": 0.7583107948303223, "step": 1315 }, { "epoch": 1.614723926380368, "grad_norm": 0.18076905608177185, "learning_rate": 4.977885464689362e-05, "loss": 0.9750147461891174, "step": 1316 }, { "epoch": 1.6159509202453988, "grad_norm": 0.1955837607383728, "learning_rate": 4.977818802763712e-05, "loss": 0.9074500799179077, "step": 1317 }, { "epoch": 1.6171779141104294, "grad_norm": 0.20664583146572113, "learning_rate": 4.977752040964253e-05, "loss": 0.8641035556793213, "step": 1318 }, { "epoch": 1.61840490797546, "grad_norm": 0.1980990320444107, "learning_rate": 4.977685179293676e-05, "loss": 0.8859966397285461, "step": 1319 }, { "epoch": 1.6196319018404908, "grad_norm": 0.24124565720558167, "learning_rate": 4.977618217754676e-05, "loss": 0.8036589622497559, "step": 1320 }, { "epoch": 1.6208588957055214, "grad_norm": 0.20728148519992828, "learning_rate": 4.977551156349952e-05, "loss": 0.7865115404129028, "step": 1321 }, { "epoch": 1.622085889570552, "grad_norm": 0.213145449757576, "learning_rate": 4.9774839950822086e-05, "loss": 0.895906388759613, "step": 1322 }, { "epoch": 1.6233128834355828, "grad_norm": 0.24244731664657593, "learning_rate": 4.97741673395415e-05, "loss": 0.7418704032897949, "step": 1323 }, { "epoch": 1.6245398773006134, "grad_norm": 0.171245276927948, "learning_rate": 4.977349372968488e-05, "loss": 1.0677049160003662, "step": 1324 }, { "epoch": 1.6257668711656441, "grad_norm": 0.22584687173366547, "learning_rate": 4.977281912127939e-05, "loss": 0.8112649917602539, "step": 1325 }, { "epoch": 1.6269938650306748, "grad_norm": 0.20367532968521118, "learning_rate": 4.977214351435222e-05, "loss": 1.0096423625946045, "step": 1326 }, { "epoch": 1.6282208588957054, "grad_norm": 0.20453089475631714, "learning_rate": 4.9771466908930595e-05, "loss": 0.6998109817504883, "step": 1327 }, { "epoch": 1.6294478527607361, "grad_norm": 0.17817901074886322, "learning_rate": 4.977078930504179e-05, "loss": 0.788205623626709, "step": 1328 }, { "epoch": 1.6306748466257668, "grad_norm": 0.22603613138198853, "learning_rate": 4.9770110702713116e-05, "loss": 0.7386012673377991, "step": 1329 }, { "epoch": 1.6319018404907975, "grad_norm": 0.1970590054988861, "learning_rate": 4.976943110197193e-05, "loss": 0.9151065349578857, "step": 1330 }, { "epoch": 1.6331288343558281, "grad_norm": 0.19774241745471954, "learning_rate": 4.9768750502845616e-05, "loss": 0.8456887602806091, "step": 1331 }, { "epoch": 1.6343558282208588, "grad_norm": 0.20249997079372406, "learning_rate": 4.976806890536161e-05, "loss": 0.8153011202812195, "step": 1332 }, { "epoch": 1.6355828220858895, "grad_norm": 0.18277142941951752, "learning_rate": 4.976738630954739e-05, "loss": 0.84360671043396, "step": 1333 }, { "epoch": 1.6368098159509201, "grad_norm": 0.17833977937698364, "learning_rate": 4.9766702715430466e-05, "loss": 1.0284920930862427, "step": 1334 }, { "epoch": 1.6380368098159508, "grad_norm": 0.20557193458080292, "learning_rate": 4.976601812303839e-05, "loss": 0.7970531582832336, "step": 1335 }, { "epoch": 1.6392638036809815, "grad_norm": 0.21470536291599274, "learning_rate": 4.976533253239877e-05, "loss": 0.8880772590637207, "step": 1336 }, { "epoch": 1.6404907975460121, "grad_norm": 0.2033514678478241, "learning_rate": 4.976464594353921e-05, "loss": 0.9052658081054688, "step": 1337 }, { "epoch": 1.6417177914110428, "grad_norm": 0.1632552146911621, "learning_rate": 4.9763958356487414e-05, "loss": 0.8446459174156189, "step": 1338 }, { "epoch": 1.6429447852760735, "grad_norm": 0.23561400175094604, "learning_rate": 4.976326977127109e-05, "loss": 0.8300870060920715, "step": 1339 }, { "epoch": 1.6441717791411041, "grad_norm": 0.19152043759822845, "learning_rate": 4.976258018791798e-05, "loss": 0.9152284860610962, "step": 1340 }, { "epoch": 1.645398773006135, "grad_norm": 0.17993362247943878, "learning_rate": 4.976188960645589e-05, "loss": 0.9743492603302002, "step": 1341 }, { "epoch": 1.6466257668711657, "grad_norm": 0.16835243999958038, "learning_rate": 4.976119802691266e-05, "loss": 1.0597968101501465, "step": 1342 }, { "epoch": 1.6478527607361964, "grad_norm": 0.2086205929517746, "learning_rate": 4.9760505449316155e-05, "loss": 0.754048228263855, "step": 1343 }, { "epoch": 1.649079754601227, "grad_norm": 0.17771516740322113, "learning_rate": 4.9759811873694286e-05, "loss": 0.8939000368118286, "step": 1344 }, { "epoch": 1.6503067484662577, "grad_norm": 0.2232564091682434, "learning_rate": 4.975911730007502e-05, "loss": 0.7638440132141113, "step": 1345 }, { "epoch": 1.6515337423312884, "grad_norm": 0.22877466678619385, "learning_rate": 4.975842172848636e-05, "loss": 0.7275345325469971, "step": 1346 }, { "epoch": 1.652760736196319, "grad_norm": 0.19890423119068146, "learning_rate": 4.9757725158956325e-05, "loss": 0.8932344317436218, "step": 1347 }, { "epoch": 1.6539877300613497, "grad_norm": 0.1786969006061554, "learning_rate": 4.9757027591513e-05, "loss": 0.9943820834159851, "step": 1348 }, { "epoch": 1.6552147239263804, "grad_norm": 0.1609014868736267, "learning_rate": 4.975632902618451e-05, "loss": 0.8346821069717407, "step": 1349 }, { "epoch": 1.656441717791411, "grad_norm": 0.22072093188762665, "learning_rate": 4.9755629462999e-05, "loss": 0.7408947944641113, "step": 1350 }, { "epoch": 1.6576687116564417, "grad_norm": 0.18604783713817596, "learning_rate": 4.975492890198467e-05, "loss": 0.8323352932929993, "step": 1351 }, { "epoch": 1.6588957055214724, "grad_norm": 0.20381394028663635, "learning_rate": 4.975422734316976e-05, "loss": 0.8683269619941711, "step": 1352 }, { "epoch": 1.660122699386503, "grad_norm": 0.2345447838306427, "learning_rate": 4.975352478658255e-05, "loss": 0.8947796821594238, "step": 1353 }, { "epoch": 1.6613496932515337, "grad_norm": 0.19269905984401703, "learning_rate": 4.9752821232251345e-05, "loss": 1.0251814126968384, "step": 1354 }, { "epoch": 1.6625766871165644, "grad_norm": 0.19592401385307312, "learning_rate": 4.975211668020453e-05, "loss": 0.8965837955474854, "step": 1355 }, { "epoch": 1.6638036809815953, "grad_norm": 0.20245297253131866, "learning_rate": 4.975141113047047e-05, "loss": 0.9641952514648438, "step": 1356 }, { "epoch": 1.665030674846626, "grad_norm": 0.18568268418312073, "learning_rate": 4.975070458307763e-05, "loss": 1.0007257461547852, "step": 1357 }, { "epoch": 1.6662576687116566, "grad_norm": 0.1769890934228897, "learning_rate": 4.9749997038054475e-05, "loss": 0.8204682469367981, "step": 1358 }, { "epoch": 1.6674846625766873, "grad_norm": 0.16598713397979736, "learning_rate": 4.974928849542954e-05, "loss": 0.9657609462738037, "step": 1359 }, { "epoch": 1.668711656441718, "grad_norm": 0.1924182027578354, "learning_rate": 4.974857895523136e-05, "loss": 0.9203324317932129, "step": 1360 }, { "epoch": 1.6699386503067486, "grad_norm": 0.1792140156030655, "learning_rate": 4.974786841748855e-05, "loss": 1.0177465677261353, "step": 1361 }, { "epoch": 1.6711656441717793, "grad_norm": 0.19226951897144318, "learning_rate": 4.974715688222975e-05, "loss": 1.1163201332092285, "step": 1362 }, { "epoch": 1.67239263803681, "grad_norm": 0.1622837632894516, "learning_rate": 4.9746444349483635e-05, "loss": 0.8827273845672607, "step": 1363 }, { "epoch": 1.6736196319018406, "grad_norm": 0.2630852162837982, "learning_rate": 4.9745730819278926e-05, "loss": 0.8949026465415955, "step": 1364 }, { "epoch": 1.6748466257668713, "grad_norm": 0.19422803819179535, "learning_rate": 4.974501629164438e-05, "loss": 0.9846029877662659, "step": 1365 }, { "epoch": 1.676073619631902, "grad_norm": 0.17430828511714935, "learning_rate": 4.974430076660882e-05, "loss": 1.043534278869629, "step": 1366 }, { "epoch": 1.6773006134969326, "grad_norm": 0.21537967026233673, "learning_rate": 4.974358424420106e-05, "loss": 0.8803274035453796, "step": 1367 }, { "epoch": 1.6785276073619633, "grad_norm": 0.20551061630249023, "learning_rate": 4.974286672444998e-05, "loss": 0.8408902883529663, "step": 1368 }, { "epoch": 1.679754601226994, "grad_norm": 0.18277586996555328, "learning_rate": 4.974214820738452e-05, "loss": 0.8529012203216553, "step": 1369 }, { "epoch": 1.6809815950920246, "grad_norm": 0.2194904088973999, "learning_rate": 4.974142869303363e-05, "loss": 1.076759696006775, "step": 1370 }, { "epoch": 1.6822085889570553, "grad_norm": 0.18237271904945374, "learning_rate": 4.9740708181426324e-05, "loss": 0.8302558660507202, "step": 1371 }, { "epoch": 1.683435582822086, "grad_norm": 0.19522808492183685, "learning_rate": 4.973998667259162e-05, "loss": 0.8948425054550171, "step": 1372 }, { "epoch": 1.6846625766871166, "grad_norm": 0.1836577206850052, "learning_rate": 4.973926416655862e-05, "loss": 0.9870635271072388, "step": 1373 }, { "epoch": 1.6858895705521473, "grad_norm": 0.16631397604942322, "learning_rate": 4.973854066335645e-05, "loss": 0.8610117435455322, "step": 1374 }, { "epoch": 1.687116564417178, "grad_norm": 0.1787792295217514, "learning_rate": 4.973781616301425e-05, "loss": 1.0194467306137085, "step": 1375 }, { "epoch": 1.6883435582822086, "grad_norm": 0.17316977679729462, "learning_rate": 4.9737090665561235e-05, "loss": 0.9435144662857056, "step": 1376 }, { "epoch": 1.6895705521472393, "grad_norm": 0.21574732661247253, "learning_rate": 4.9736364171026654e-05, "loss": 0.8577451705932617, "step": 1377 }, { "epoch": 1.69079754601227, "grad_norm": 0.1840669810771942, "learning_rate": 4.973563667943977e-05, "loss": 0.9301040172576904, "step": 1378 }, { "epoch": 1.6920245398773006, "grad_norm": 0.1802053302526474, "learning_rate": 4.973490819082993e-05, "loss": 0.8022339940071106, "step": 1379 }, { "epoch": 1.6932515337423313, "grad_norm": 0.18120068311691284, "learning_rate": 4.973417870522649e-05, "loss": 1.0527122020721436, "step": 1380 }, { "epoch": 1.694478527607362, "grad_norm": 0.18511131405830383, "learning_rate": 4.9733448222658845e-05, "loss": 0.9027064442634583, "step": 1381 }, { "epoch": 1.6957055214723926, "grad_norm": 0.18909253180027008, "learning_rate": 4.973271674315645e-05, "loss": 0.7118675112724304, "step": 1382 }, { "epoch": 1.6969325153374233, "grad_norm": 0.21655136346817017, "learning_rate": 4.9731984266748775e-05, "loss": 1.0265941619873047, "step": 1383 }, { "epoch": 1.698159509202454, "grad_norm": 0.19379889965057373, "learning_rate": 4.9731250793465356e-05, "loss": 0.846508264541626, "step": 1384 }, { "epoch": 1.6993865030674846, "grad_norm": 0.191395565867424, "learning_rate": 4.973051632333575e-05, "loss": 0.8975883722305298, "step": 1385 }, { "epoch": 1.7006134969325153, "grad_norm": 0.18798606097698212, "learning_rate": 4.9729780856389576e-05, "loss": 0.9934200048446655, "step": 1386 }, { "epoch": 1.701840490797546, "grad_norm": 0.14428120851516724, "learning_rate": 4.972904439265645e-05, "loss": 1.0091893672943115, "step": 1387 }, { "epoch": 1.7030674846625766, "grad_norm": 0.28410932421684265, "learning_rate": 4.9728306932166094e-05, "loss": 0.7320271730422974, "step": 1388 }, { "epoch": 1.7042944785276073, "grad_norm": 0.2038065642118454, "learning_rate": 4.972756847494819e-05, "loss": 0.8879655599594116, "step": 1389 }, { "epoch": 1.705521472392638, "grad_norm": 0.18124379217624664, "learning_rate": 4.9726829021032545e-05, "loss": 0.8120673894882202, "step": 1390 }, { "epoch": 1.7067484662576686, "grad_norm": 0.23137983679771423, "learning_rate": 4.972608857044894e-05, "loss": 0.8915911912918091, "step": 1391 }, { "epoch": 1.7079754601226993, "grad_norm": 0.19738420844078064, "learning_rate": 4.9725347123227226e-05, "loss": 0.8157975673675537, "step": 1392 }, { "epoch": 1.70920245398773, "grad_norm": 0.19759586453437805, "learning_rate": 4.972460467939728e-05, "loss": 0.7579189538955688, "step": 1393 }, { "epoch": 1.7104294478527606, "grad_norm": 0.2068149745464325, "learning_rate": 4.9723861238989045e-05, "loss": 0.9182103872299194, "step": 1394 }, { "epoch": 1.7116564417177913, "grad_norm": 0.24214251339435577, "learning_rate": 4.9723116802032474e-05, "loss": 0.7443536520004272, "step": 1395 }, { "epoch": 1.712883435582822, "grad_norm": 0.17724289000034332, "learning_rate": 4.972237136855758e-05, "loss": 0.9887142181396484, "step": 1396 }, { "epoch": 1.7141104294478526, "grad_norm": 0.17225778102874756, "learning_rate": 4.9721624938594404e-05, "loss": 0.9006113409996033, "step": 1397 }, { "epoch": 1.7153374233128833, "grad_norm": 0.19417467713356018, "learning_rate": 4.972087751217304e-05, "loss": 0.7861904501914978, "step": 1398 }, { "epoch": 1.716564417177914, "grad_norm": 0.1973291039466858, "learning_rate": 4.9720129089323607e-05, "loss": 0.8158378005027771, "step": 1399 }, { "epoch": 1.7177914110429446, "grad_norm": 0.2050553560256958, "learning_rate": 4.971937967007627e-05, "loss": 0.7763911485671997, "step": 1400 }, { "epoch": 1.7190184049079753, "grad_norm": 0.17174991965293884, "learning_rate": 4.971862925446125e-05, "loss": 0.976986289024353, "step": 1401 }, { "epoch": 1.720245398773006, "grad_norm": 0.16899751126766205, "learning_rate": 4.971787784250877e-05, "loss": 0.7443711161613464, "step": 1402 }, { "epoch": 1.7214723926380369, "grad_norm": 0.17108877003192902, "learning_rate": 4.971712543424915e-05, "loss": 1.0153160095214844, "step": 1403 }, { "epoch": 1.7226993865030675, "grad_norm": 0.18930481374263763, "learning_rate": 4.971637202971268e-05, "loss": 0.7841814756393433, "step": 1404 }, { "epoch": 1.7239263803680982, "grad_norm": 0.1852414458990097, "learning_rate": 4.971561762892976e-05, "loss": 0.8461225032806396, "step": 1405 }, { "epoch": 1.7251533742331289, "grad_norm": 0.17831310629844666, "learning_rate": 4.9714862231930784e-05, "loss": 0.9308940172195435, "step": 1406 }, { "epoch": 1.7263803680981595, "grad_norm": 0.16007459163665771, "learning_rate": 4.971410583874619e-05, "loss": 1.0668251514434814, "step": 1407 }, { "epoch": 1.7276073619631902, "grad_norm": 0.20256724953651428, "learning_rate": 4.9713348449406485e-05, "loss": 0.8975697755813599, "step": 1408 }, { "epoch": 1.7288343558282209, "grad_norm": 0.1798015683889389, "learning_rate": 4.971259006394219e-05, "loss": 1.0853426456451416, "step": 1409 }, { "epoch": 1.7300613496932515, "grad_norm": 0.19930899143218994, "learning_rate": 4.971183068238387e-05, "loss": 0.8682226538658142, "step": 1410 }, { "epoch": 1.7312883435582822, "grad_norm": 0.18173828721046448, "learning_rate": 4.971107030476213e-05, "loss": 0.9526892900466919, "step": 1411 }, { "epoch": 1.7325153374233129, "grad_norm": 0.18088404834270477, "learning_rate": 4.971030893110763e-05, "loss": 0.9224306344985962, "step": 1412 }, { "epoch": 1.7337423312883435, "grad_norm": 0.17253927886486053, "learning_rate": 4.9709546561451054e-05, "loss": 0.8963764905929565, "step": 1413 }, { "epoch": 1.7349693251533742, "grad_norm": 0.16089355945587158, "learning_rate": 4.970878319582313e-05, "loss": 0.8877451419830322, "step": 1414 }, { "epoch": 1.7361963190184049, "grad_norm": 0.18568135797977448, "learning_rate": 4.9708018834254623e-05, "loss": 0.8700766563415527, "step": 1415 }, { "epoch": 1.7374233128834355, "grad_norm": 0.1794903427362442, "learning_rate": 4.970725347677635e-05, "loss": 1.0166914463043213, "step": 1416 }, { "epoch": 1.7386503067484662, "grad_norm": 0.1760503351688385, "learning_rate": 4.970648712341915e-05, "loss": 1.0287595987319946, "step": 1417 }, { "epoch": 1.7398773006134969, "grad_norm": 0.17696420848369598, "learning_rate": 4.9705719774213924e-05, "loss": 0.9214167594909668, "step": 1418 }, { "epoch": 1.7411042944785278, "grad_norm": 0.19626584649085999, "learning_rate": 4.97049514291916e-05, "loss": 0.917699933052063, "step": 1419 }, { "epoch": 1.7423312883435584, "grad_norm": 0.164072647690773, "learning_rate": 4.970418208838314e-05, "loss": 0.8806049823760986, "step": 1420 }, { "epoch": 1.743558282208589, "grad_norm": 0.18971270322799683, "learning_rate": 4.970341175181956e-05, "loss": 0.9840154647827148, "step": 1421 }, { "epoch": 1.7447852760736198, "grad_norm": 0.1818685233592987, "learning_rate": 4.970264041953191e-05, "loss": 0.8325216770172119, "step": 1422 }, { "epoch": 1.7460122699386504, "grad_norm": 0.22311043739318848, "learning_rate": 4.970186809155128e-05, "loss": 0.8713372349739075, "step": 1423 }, { "epoch": 1.747239263803681, "grad_norm": 0.233389750123024, "learning_rate": 4.9701094767908795e-05, "loss": 0.8278717994689941, "step": 1424 }, { "epoch": 1.7484662576687118, "grad_norm": 0.19432400166988373, "learning_rate": 4.9700320448635626e-05, "loss": 0.875807523727417, "step": 1425 }, { "epoch": 1.7496932515337424, "grad_norm": 0.20572824776172638, "learning_rate": 4.969954513376299e-05, "loss": 0.8697029948234558, "step": 1426 }, { "epoch": 1.7509202453987731, "grad_norm": 0.17346598207950592, "learning_rate": 4.969876882332214e-05, "loss": 1.004837989807129, "step": 1427 }, { "epoch": 1.7521472392638038, "grad_norm": 0.1590670645236969, "learning_rate": 4.969799151734436e-05, "loss": 0.8151943683624268, "step": 1428 }, { "epoch": 1.7533742331288344, "grad_norm": 0.16744381189346313, "learning_rate": 4.9697213215860985e-05, "loss": 0.9941998720169067, "step": 1429 }, { "epoch": 1.7546012269938651, "grad_norm": 0.16515925526618958, "learning_rate": 4.969643391890338e-05, "loss": 0.9447017908096313, "step": 1430 }, { "epoch": 1.7558282208588958, "grad_norm": 0.17021630704402924, "learning_rate": 4.9695653626502957e-05, "loss": 0.8481341600418091, "step": 1431 }, { "epoch": 1.7570552147239265, "grad_norm": 0.1746109127998352, "learning_rate": 4.969487233869118e-05, "loss": 0.9539897441864014, "step": 1432 }, { "epoch": 1.7582822085889571, "grad_norm": 0.20465505123138428, "learning_rate": 4.9694090055499525e-05, "loss": 0.9486850500106812, "step": 1433 }, { "epoch": 1.7595092024539878, "grad_norm": 0.16573548316955566, "learning_rate": 4.969330677695953e-05, "loss": 0.9027010798454285, "step": 1434 }, { "epoch": 1.7607361963190185, "grad_norm": 0.19999532401561737, "learning_rate": 4.969252250310277e-05, "loss": 0.8200736045837402, "step": 1435 }, { "epoch": 1.7619631901840491, "grad_norm": 0.20762355625629425, "learning_rate": 4.9691737233960846e-05, "loss": 0.8510980606079102, "step": 1436 }, { "epoch": 1.7631901840490798, "grad_norm": 0.20922982692718506, "learning_rate": 4.969095096956542e-05, "loss": 0.9752122163772583, "step": 1437 }, { "epoch": 1.7644171779141105, "grad_norm": 0.21732710301876068, "learning_rate": 4.9690163709948186e-05, "loss": 0.7744443416595459, "step": 1438 }, { "epoch": 1.7656441717791411, "grad_norm": 0.16963250935077667, "learning_rate": 4.9689375455140866e-05, "loss": 0.9100605249404907, "step": 1439 }, { "epoch": 1.7668711656441718, "grad_norm": 0.201693594455719, "learning_rate": 4.968858620517524e-05, "loss": 0.9659551382064819, "step": 1440 }, { "epoch": 1.7680981595092025, "grad_norm": 0.1602899581193924, "learning_rate": 4.9687795960083116e-05, "loss": 0.8994865417480469, "step": 1441 }, { "epoch": 1.7693251533742331, "grad_norm": 0.18326005339622498, "learning_rate": 4.968700471989635e-05, "loss": 0.9586681127548218, "step": 1442 }, { "epoch": 1.7705521472392638, "grad_norm": 0.233284592628479, "learning_rate": 4.968621248464683e-05, "loss": 0.8470198512077332, "step": 1443 }, { "epoch": 1.7717791411042945, "grad_norm": 0.16279034316539764, "learning_rate": 4.96854192543665e-05, "loss": 0.8962979316711426, "step": 1444 }, { "epoch": 1.7730061349693251, "grad_norm": 0.22949524223804474, "learning_rate": 4.968462502908732e-05, "loss": 0.993238091468811, "step": 1445 }, { "epoch": 1.7742331288343558, "grad_norm": 0.16405397653579712, "learning_rate": 4.968382980884131e-05, "loss": 0.8893868327140808, "step": 1446 }, { "epoch": 1.7754601226993865, "grad_norm": 0.1687174290418625, "learning_rate": 4.968303359366052e-05, "loss": 1.01975679397583, "step": 1447 }, { "epoch": 1.7766871165644171, "grad_norm": 0.23544685542583466, "learning_rate": 4.968223638357704e-05, "loss": 0.9683750867843628, "step": 1448 }, { "epoch": 1.7779141104294478, "grad_norm": 0.17218413949012756, "learning_rate": 4.9681438178623005e-05, "loss": 0.9872739315032959, "step": 1449 }, { "epoch": 1.7791411042944785, "grad_norm": 0.16822843253612518, "learning_rate": 4.9680638978830593e-05, "loss": 1.06640625, "step": 1450 }, { "epoch": 1.7803680981595091, "grad_norm": 0.2543427050113678, "learning_rate": 4.967983878423202e-05, "loss": 0.8496226072311401, "step": 1451 }, { "epoch": 1.7815950920245398, "grad_norm": 0.2313835769891739, "learning_rate": 4.9679037594859534e-05, "loss": 0.9983211755752563, "step": 1452 }, { "epoch": 1.7828220858895705, "grad_norm": 0.1745442897081375, "learning_rate": 4.967823541074543e-05, "loss": 0.8975462317466736, "step": 1453 }, { "epoch": 1.7840490797546011, "grad_norm": 0.17984922230243683, "learning_rate": 4.967743223192204e-05, "loss": 0.8873209357261658, "step": 1454 }, { "epoch": 1.7852760736196318, "grad_norm": 0.2077290266752243, "learning_rate": 4.9676628058421734e-05, "loss": 0.7991093397140503, "step": 1455 }, { "epoch": 1.7865030674846625, "grad_norm": 0.18795764446258545, "learning_rate": 4.9675822890276944e-05, "loss": 0.9104118347167969, "step": 1456 }, { "epoch": 1.7877300613496931, "grad_norm": 0.19386734068393707, "learning_rate": 4.967501672752009e-05, "loss": 0.9292243719100952, "step": 1457 }, { "epoch": 1.7889570552147238, "grad_norm": 0.1770862638950348, "learning_rate": 4.96742095701837e-05, "loss": 1.0034353733062744, "step": 1458 }, { "epoch": 1.7901840490797545, "grad_norm": 0.1730252504348755, "learning_rate": 4.96734014183003e-05, "loss": 0.8876687288284302, "step": 1459 }, { "epoch": 1.7914110429447851, "grad_norm": 0.21838659048080444, "learning_rate": 4.9672592271902456e-05, "loss": 0.8257550597190857, "step": 1460 }, { "epoch": 1.7926380368098158, "grad_norm": 0.19275030493736267, "learning_rate": 4.967178213102278e-05, "loss": 0.9446018934249878, "step": 1461 }, { "epoch": 1.7938650306748465, "grad_norm": 0.1955706775188446, "learning_rate": 4.967097099569394e-05, "loss": 0.994442343711853, "step": 1462 }, { "epoch": 1.7950920245398772, "grad_norm": 0.19309383630752563, "learning_rate": 4.9670158865948625e-05, "loss": 0.9203750491142273, "step": 1463 }, { "epoch": 1.7963190184049078, "grad_norm": 0.1855948120355606, "learning_rate": 4.966934574181956e-05, "loss": 0.9189562797546387, "step": 1464 }, { "epoch": 1.7975460122699385, "grad_norm": 0.1804739087820053, "learning_rate": 4.966853162333953e-05, "loss": 0.7748914957046509, "step": 1465 }, { "epoch": 1.7987730061349694, "grad_norm": 0.17030271887779236, "learning_rate": 4.966771651054134e-05, "loss": 0.9019081592559814, "step": 1466 }, { "epoch": 1.8, "grad_norm": 0.19191472232341766, "learning_rate": 4.966690040345786e-05, "loss": 0.8309803009033203, "step": 1467 }, { "epoch": 1.8012269938650307, "grad_norm": 0.21046312153339386, "learning_rate": 4.966608330212198e-05, "loss": 0.7769635915756226, "step": 1468 }, { "epoch": 1.8024539877300614, "grad_norm": 0.20758235454559326, "learning_rate": 4.966526520656663e-05, "loss": 0.9008642435073853, "step": 1469 }, { "epoch": 1.803680981595092, "grad_norm": 0.15081936120986938, "learning_rate": 4.966444611682478e-05, "loss": 0.9577703475952148, "step": 1470 }, { "epoch": 1.8049079754601227, "grad_norm": 0.22037489712238312, "learning_rate": 4.966362603292946e-05, "loss": 0.7145841121673584, "step": 1471 }, { "epoch": 1.8061349693251534, "grad_norm": 0.18305236101150513, "learning_rate": 4.966280495491371e-05, "loss": 0.8139341473579407, "step": 1472 }, { "epoch": 1.807361963190184, "grad_norm": 0.17948433756828308, "learning_rate": 4.966198288281064e-05, "loss": 0.8765654563903809, "step": 1473 }, { "epoch": 1.8085889570552147, "grad_norm": 0.20668862760066986, "learning_rate": 4.966115981665337e-05, "loss": 0.7899892926216125, "step": 1474 }, { "epoch": 1.8098159509202454, "grad_norm": 0.18768039345741272, "learning_rate": 4.966033575647509e-05, "loss": 1.065625548362732, "step": 1475 }, { "epoch": 1.811042944785276, "grad_norm": 0.17658695578575134, "learning_rate": 4.965951070230901e-05, "loss": 0.8760710954666138, "step": 1476 }, { "epoch": 1.8122699386503067, "grad_norm": 0.18295548856258392, "learning_rate": 4.965868465418838e-05, "loss": 0.9555754661560059, "step": 1477 }, { "epoch": 1.8134969325153374, "grad_norm": 0.17984066903591156, "learning_rate": 4.965785761214651e-05, "loss": 0.9404579401016235, "step": 1478 }, { "epoch": 1.814723926380368, "grad_norm": 0.23515643179416656, "learning_rate": 4.9657029576216716e-05, "loss": 0.7723299264907837, "step": 1479 }, { "epoch": 1.8159509202453987, "grad_norm": 0.18297594785690308, "learning_rate": 4.9656200546432387e-05, "loss": 1.0695408582687378, "step": 1480 }, { "epoch": 1.8171779141104294, "grad_norm": 0.17778320610523224, "learning_rate": 4.965537052282693e-05, "loss": 0.9800499677658081, "step": 1481 }, { "epoch": 1.8184049079754603, "grad_norm": 0.24127984046936035, "learning_rate": 4.965453950543382e-05, "loss": 0.9165706634521484, "step": 1482 }, { "epoch": 1.819631901840491, "grad_norm": 0.18490642309188843, "learning_rate": 4.9653707494286525e-05, "loss": 0.9561034440994263, "step": 1483 }, { "epoch": 1.8208588957055216, "grad_norm": 0.21781449019908905, "learning_rate": 4.96528744894186e-05, "loss": 0.7549551725387573, "step": 1484 }, { "epoch": 1.8220858895705523, "grad_norm": 0.14875996112823486, "learning_rate": 4.9652040490863624e-05, "loss": 0.9284369945526123, "step": 1485 }, { "epoch": 1.823312883435583, "grad_norm": 0.16878244280815125, "learning_rate": 4.96512054986552e-05, "loss": 0.9364545345306396, "step": 1486 }, { "epoch": 1.8245398773006136, "grad_norm": 0.15773501992225647, "learning_rate": 4.965036951282699e-05, "loss": 0.9250108003616333, "step": 1487 }, { "epoch": 1.8257668711656443, "grad_norm": 0.18498362600803375, "learning_rate": 4.964953253341269e-05, "loss": 0.8930867910385132, "step": 1488 }, { "epoch": 1.826993865030675, "grad_norm": 0.21401670575141907, "learning_rate": 4.964869456044603e-05, "loss": 0.8250068426132202, "step": 1489 }, { "epoch": 1.8282208588957056, "grad_norm": 0.1685289442539215, "learning_rate": 4.96478555939608e-05, "loss": 1.096919298171997, "step": 1490 }, { "epoch": 1.8294478527607363, "grad_norm": 0.19460628926753998, "learning_rate": 4.9647015633990814e-05, "loss": 0.920468807220459, "step": 1491 }, { "epoch": 1.830674846625767, "grad_norm": 0.17131556570529938, "learning_rate": 4.9646174680569914e-05, "loss": 0.9064468145370483, "step": 1492 }, { "epoch": 1.8319018404907976, "grad_norm": 0.17492350935935974, "learning_rate": 4.964533273373201e-05, "loss": 0.9352222681045532, "step": 1493 }, { "epoch": 1.8331288343558283, "grad_norm": 0.2127833366394043, "learning_rate": 4.964448979351103e-05, "loss": 0.8069888353347778, "step": 1494 }, { "epoch": 1.834355828220859, "grad_norm": 0.18774263560771942, "learning_rate": 4.964364585994096e-05, "loss": 0.9633301496505737, "step": 1495 }, { "epoch": 1.8355828220858896, "grad_norm": 0.2015870064496994, "learning_rate": 4.964280093305581e-05, "loss": 0.9160312414169312, "step": 1496 }, { "epoch": 1.8368098159509203, "grad_norm": 0.23221443593502045, "learning_rate": 4.964195501288963e-05, "loss": 0.8378638029098511, "step": 1497 }, { "epoch": 1.838036809815951, "grad_norm": 0.17422644793987274, "learning_rate": 4.9641108099476535e-05, "loss": 0.8750184774398804, "step": 1498 }, { "epoch": 1.8392638036809816, "grad_norm": 0.1966998428106308, "learning_rate": 4.964026019285065e-05, "loss": 0.8838995695114136, "step": 1499 }, { "epoch": 1.8404907975460123, "grad_norm": 0.1833258867263794, "learning_rate": 4.963941129304615e-05, "loss": 0.8603208065032959, "step": 1500 }, { "epoch": 1.841717791411043, "grad_norm": 0.214803546667099, "learning_rate": 4.963856140009725e-05, "loss": 0.7688221335411072, "step": 1501 }, { "epoch": 1.8429447852760736, "grad_norm": 0.20921246707439423, "learning_rate": 4.9637710514038215e-05, "loss": 0.879905104637146, "step": 1502 }, { "epoch": 1.8441717791411043, "grad_norm": 0.18248675763607025, "learning_rate": 4.963685863490334e-05, "loss": 0.8165733814239502, "step": 1503 }, { "epoch": 1.845398773006135, "grad_norm": 0.1789482980966568, "learning_rate": 4.963600576272696e-05, "loss": 1.0782955884933472, "step": 1504 }, { "epoch": 1.8466257668711656, "grad_norm": 0.19839724898338318, "learning_rate": 4.9635151897543455e-05, "loss": 0.9207353591918945, "step": 1505 }, { "epoch": 1.8478527607361963, "grad_norm": 0.1649027317762375, "learning_rate": 4.9634297039387234e-05, "loss": 0.9392503499984741, "step": 1506 }, { "epoch": 1.849079754601227, "grad_norm": 0.18262377381324768, "learning_rate": 4.963344118829276e-05, "loss": 0.9475472569465637, "step": 1507 }, { "epoch": 1.8503067484662576, "grad_norm": 0.16946223378181458, "learning_rate": 4.9632584344294525e-05, "loss": 0.9143075942993164, "step": 1508 }, { "epoch": 1.8515337423312883, "grad_norm": 0.17727136611938477, "learning_rate": 4.9631726507427066e-05, "loss": 0.9670894145965576, "step": 1509 }, { "epoch": 1.852760736196319, "grad_norm": 0.19635681807994843, "learning_rate": 4.9630867677724975e-05, "loss": 0.7390514016151428, "step": 1510 }, { "epoch": 1.8539877300613496, "grad_norm": 0.16018672287464142, "learning_rate": 4.963000785522285e-05, "loss": 0.9309619069099426, "step": 1511 }, { "epoch": 1.8552147239263803, "grad_norm": 0.20592768490314484, "learning_rate": 4.962914703995536e-05, "loss": 0.7426810264587402, "step": 1512 }, { "epoch": 1.856441717791411, "grad_norm": 0.19814561307430267, "learning_rate": 4.9628285231957194e-05, "loss": 0.942278265953064, "step": 1513 }, { "epoch": 1.8576687116564417, "grad_norm": 0.1565573662519455, "learning_rate": 4.962742243126309e-05, "loss": 0.9857934713363647, "step": 1514 }, { "epoch": 1.8588957055214723, "grad_norm": 0.19160668551921844, "learning_rate": 4.962655863790783e-05, "loss": 0.8452612161636353, "step": 1515 }, { "epoch": 1.860122699386503, "grad_norm": 0.15453778207302094, "learning_rate": 4.962569385192623e-05, "loss": 0.9605095386505127, "step": 1516 }, { "epoch": 1.8613496932515337, "grad_norm": 0.24945612251758575, "learning_rate": 4.9624828073353144e-05, "loss": 0.7306519746780396, "step": 1517 }, { "epoch": 1.8625766871165643, "grad_norm": 0.19469057023525238, "learning_rate": 4.962396130222347e-05, "loss": 0.8539035320281982, "step": 1518 }, { "epoch": 1.863803680981595, "grad_norm": 0.16174711287021637, "learning_rate": 4.962309353857215e-05, "loss": 0.97934490442276, "step": 1519 }, { "epoch": 1.8650306748466257, "grad_norm": 0.17566117644309998, "learning_rate": 4.962222478243415e-05, "loss": 0.887444257736206, "step": 1520 }, { "epoch": 1.8662576687116563, "grad_norm": 0.2134261131286621, "learning_rate": 4.96213550338445e-05, "loss": 0.824278712272644, "step": 1521 }, { "epoch": 1.867484662576687, "grad_norm": 0.1517765372991562, "learning_rate": 4.962048429283824e-05, "loss": 0.9797631502151489, "step": 1522 }, { "epoch": 1.8687116564417177, "grad_norm": 0.17840948700904846, "learning_rate": 4.961961255945049e-05, "loss": 0.9797592759132385, "step": 1523 }, { "epoch": 1.8699386503067483, "grad_norm": 0.17095498740673065, "learning_rate": 4.961873983371637e-05, "loss": 0.9250251650810242, "step": 1524 }, { "epoch": 1.871165644171779, "grad_norm": 0.15412889420986176, "learning_rate": 4.961786611567106e-05, "loss": 1.0174615383148193, "step": 1525 }, { "epoch": 1.8723926380368097, "grad_norm": 0.16282574832439423, "learning_rate": 4.961699140534979e-05, "loss": 0.7446683049201965, "step": 1526 }, { "epoch": 1.8736196319018403, "grad_norm": 0.18036361038684845, "learning_rate": 4.961611570278779e-05, "loss": 0.8519653081893921, "step": 1527 }, { "epoch": 1.874846625766871, "grad_norm": 0.22247721254825592, "learning_rate": 4.9615239008020384e-05, "loss": 0.8003795146942139, "step": 1528 }, { "epoch": 1.876073619631902, "grad_norm": 0.18070849776268005, "learning_rate": 4.96143613210829e-05, "loss": 0.8841311931610107, "step": 1529 }, { "epoch": 1.8773006134969326, "grad_norm": 0.1530921906232834, "learning_rate": 4.961348264201071e-05, "loss": 1.0426563024520874, "step": 1530 }, { "epoch": 1.8785276073619632, "grad_norm": 0.20837248861789703, "learning_rate": 4.961260297083923e-05, "loss": 0.8552258014678955, "step": 1531 }, { "epoch": 1.879754601226994, "grad_norm": 0.19860149919986725, "learning_rate": 4.961172230760393e-05, "loss": 0.7021901607513428, "step": 1532 }, { "epoch": 1.8809815950920246, "grad_norm": 0.16727915406227112, "learning_rate": 4.9610840652340295e-05, "loss": 0.8286685347557068, "step": 1533 }, { "epoch": 1.8822085889570552, "grad_norm": 0.21328461170196533, "learning_rate": 4.9609958005083864e-05, "loss": 0.7684495449066162, "step": 1534 }, { "epoch": 1.883435582822086, "grad_norm": 0.16966557502746582, "learning_rate": 4.960907436587022e-05, "loss": 0.9439365863800049, "step": 1535 }, { "epoch": 1.8846625766871166, "grad_norm": 0.22845610976219177, "learning_rate": 4.9608189734734966e-05, "loss": 0.6940321922302246, "step": 1536 }, { "epoch": 1.8858895705521472, "grad_norm": 0.17308856546878815, "learning_rate": 4.960730411171378e-05, "loss": 0.8693904876708984, "step": 1537 }, { "epoch": 1.887116564417178, "grad_norm": 0.22656549513339996, "learning_rate": 4.960641749684234e-05, "loss": 0.75306236743927, "step": 1538 }, { "epoch": 1.8883435582822086, "grad_norm": 0.19471502304077148, "learning_rate": 4.9605529890156395e-05, "loss": 0.7944803237915039, "step": 1539 }, { "epoch": 1.8895705521472392, "grad_norm": 0.1931617110967636, "learning_rate": 4.9604641291691714e-05, "loss": 0.7011860609054565, "step": 1540 }, { "epoch": 1.89079754601227, "grad_norm": 0.14714878797531128, "learning_rate": 4.960375170148411e-05, "loss": 0.9628242254257202, "step": 1541 }, { "epoch": 1.8920245398773006, "grad_norm": 0.15444716811180115, "learning_rate": 4.960286111956947e-05, "loss": 0.9737740755081177, "step": 1542 }, { "epoch": 1.8932515337423312, "grad_norm": 0.19553148746490479, "learning_rate": 4.960196954598365e-05, "loss": 0.8973909616470337, "step": 1543 }, { "epoch": 1.8944785276073621, "grad_norm": 0.18491561710834503, "learning_rate": 4.960107698076261e-05, "loss": 0.9301726818084717, "step": 1544 }, { "epoch": 1.8957055214723928, "grad_norm": 0.21429064869880676, "learning_rate": 4.9600183423942314e-05, "loss": 0.7715363502502441, "step": 1545 }, { "epoch": 1.8969325153374235, "grad_norm": 0.15003728866577148, "learning_rate": 4.959928887555879e-05, "loss": 0.9092047810554504, "step": 1546 }, { "epoch": 1.8981595092024541, "grad_norm": 0.160672128200531, "learning_rate": 4.959839333564809e-05, "loss": 0.9670131206512451, "step": 1547 }, { "epoch": 1.8993865030674848, "grad_norm": 0.20472607016563416, "learning_rate": 4.9597496804246314e-05, "loss": 0.8595390319824219, "step": 1548 }, { "epoch": 1.9006134969325155, "grad_norm": 0.16051124036312103, "learning_rate": 4.95965992813896e-05, "loss": 1.0236680507659912, "step": 1549 }, { "epoch": 1.9018404907975461, "grad_norm": 0.17889274656772614, "learning_rate": 4.959570076711411e-05, "loss": 0.9005031585693359, "step": 1550 }, { "epoch": 1.9030674846625768, "grad_norm": 0.19357141852378845, "learning_rate": 4.959480126145608e-05, "loss": 0.9002456665039062, "step": 1551 }, { "epoch": 1.9042944785276075, "grad_norm": 0.19703778624534607, "learning_rate": 4.9593900764451754e-05, "loss": 0.9661865234375, "step": 1552 }, { "epoch": 1.9055214723926381, "grad_norm": 0.17217668890953064, "learning_rate": 4.959299927613743e-05, "loss": 0.8984997272491455, "step": 1553 }, { "epoch": 1.9067484662576688, "grad_norm": 0.16432301700115204, "learning_rate": 4.9592096796549456e-05, "loss": 0.9212000370025635, "step": 1554 }, { "epoch": 1.9079754601226995, "grad_norm": 0.17083582282066345, "learning_rate": 4.95911933257242e-05, "loss": 0.7747185826301575, "step": 1555 }, { "epoch": 1.9092024539877301, "grad_norm": 0.1746944934129715, "learning_rate": 4.959028886369806e-05, "loss": 0.800330638885498, "step": 1556 }, { "epoch": 1.9104294478527608, "grad_norm": 0.19548843801021576, "learning_rate": 4.9589383410507515e-05, "loss": 0.8206184506416321, "step": 1557 }, { "epoch": 1.9116564417177915, "grad_norm": 0.18770486116409302, "learning_rate": 4.9588476966189066e-05, "loss": 0.9147030115127563, "step": 1558 }, { "epoch": 1.9128834355828221, "grad_norm": 0.1830611675977707, "learning_rate": 4.958756953077923e-05, "loss": 0.9939022064208984, "step": 1559 }, { "epoch": 1.9141104294478528, "grad_norm": 0.2212991565465927, "learning_rate": 4.9586661104314605e-05, "loss": 0.8002225756645203, "step": 1560 }, { "epoch": 1.9153374233128835, "grad_norm": 0.17702089250087738, "learning_rate": 4.9585751686831785e-05, "loss": 0.9197590947151184, "step": 1561 }, { "epoch": 1.9165644171779141, "grad_norm": 0.17594580352306366, "learning_rate": 4.9584841278367436e-05, "loss": 0.9223810434341431, "step": 1562 }, { "epoch": 1.9177914110429448, "grad_norm": 0.1538672298192978, "learning_rate": 4.9583929878958247e-05, "loss": 0.9740074872970581, "step": 1563 }, { "epoch": 1.9190184049079755, "grad_norm": 0.20818589627742767, "learning_rate": 4.9583017488640964e-05, "loss": 0.8272386789321899, "step": 1564 }, { "epoch": 1.9202453987730062, "grad_norm": 0.14346350729465485, "learning_rate": 4.958210410745236e-05, "loss": 0.974341869354248, "step": 1565 }, { "epoch": 1.9214723926380368, "grad_norm": 0.1756288856267929, "learning_rate": 4.958118973542926e-05, "loss": 0.942797064781189, "step": 1566 }, { "epoch": 1.9226993865030675, "grad_norm": 0.15204842388629913, "learning_rate": 4.958027437260849e-05, "loss": 0.9583417177200317, "step": 1567 }, { "epoch": 1.9239263803680982, "grad_norm": 0.17730620503425598, "learning_rate": 4.9579358019026976e-05, "loss": 1.0366731882095337, "step": 1568 }, { "epoch": 1.9251533742331288, "grad_norm": 0.19271793961524963, "learning_rate": 4.957844067472164e-05, "loss": 0.8701282739639282, "step": 1569 }, { "epoch": 1.9263803680981595, "grad_norm": 0.17598260939121246, "learning_rate": 4.957752233972946e-05, "loss": 0.9642670154571533, "step": 1570 }, { "epoch": 1.9276073619631902, "grad_norm": 0.18501995503902435, "learning_rate": 4.957660301408746e-05, "loss": 0.8530995845794678, "step": 1571 }, { "epoch": 1.9288343558282208, "grad_norm": 0.17495594918727875, "learning_rate": 4.957568269783268e-05, "loss": 0.8996613025665283, "step": 1572 }, { "epoch": 1.9300613496932515, "grad_norm": 0.18126291036605835, "learning_rate": 4.9574761391002225e-05, "loss": 0.913521409034729, "step": 1573 }, { "epoch": 1.9312883435582822, "grad_norm": 0.1795986145734787, "learning_rate": 4.957383909363322e-05, "loss": 0.9078181385993958, "step": 1574 }, { "epoch": 1.9325153374233128, "grad_norm": 0.16991032660007477, "learning_rate": 4.957291580576287e-05, "loss": 0.9249233603477478, "step": 1575 }, { "epoch": 1.9337423312883435, "grad_norm": 0.16671553254127502, "learning_rate": 4.957199152742834e-05, "loss": 0.7964931130409241, "step": 1576 }, { "epoch": 1.9349693251533742, "grad_norm": 0.1705479472875595, "learning_rate": 4.957106625866693e-05, "loss": 0.8568717241287231, "step": 1577 }, { "epoch": 1.9361963190184048, "grad_norm": 0.16573897004127502, "learning_rate": 4.9570139999515915e-05, "loss": 0.9818302392959595, "step": 1578 }, { "epoch": 1.9374233128834355, "grad_norm": 0.16496293246746063, "learning_rate": 4.956921275001263e-05, "loss": 0.9077153205871582, "step": 1579 }, { "epoch": 1.9386503067484662, "grad_norm": 0.2277350127696991, "learning_rate": 4.9568284510194454e-05, "loss": 0.8434136509895325, "step": 1580 }, { "epoch": 1.9398773006134968, "grad_norm": 0.17750298976898193, "learning_rate": 4.95673552800988e-05, "loss": 0.9872832298278809, "step": 1581 }, { "epoch": 1.9411042944785275, "grad_norm": 0.2131441831588745, "learning_rate": 4.956642505976312e-05, "loss": 0.9801127910614014, "step": 1582 }, { "epoch": 1.9423312883435582, "grad_norm": 0.199015274643898, "learning_rate": 4.956549384922492e-05, "loss": 0.8954943418502808, "step": 1583 }, { "epoch": 1.9435582822085888, "grad_norm": 0.2089022696018219, "learning_rate": 4.956456164852172e-05, "loss": 0.7487335205078125, "step": 1584 }, { "epoch": 1.9447852760736195, "grad_norm": 0.17446471750736237, "learning_rate": 4.95636284576911e-05, "loss": 0.8672791123390198, "step": 1585 }, { "epoch": 1.9460122699386502, "grad_norm": 0.1961076557636261, "learning_rate": 4.9562694276770674e-05, "loss": 0.8640552163124084, "step": 1586 }, { "epoch": 1.9472392638036808, "grad_norm": 0.18534518778324127, "learning_rate": 4.95617591057981e-05, "loss": 0.8093466758728027, "step": 1587 }, { "epoch": 1.9484662576687115, "grad_norm": 0.19104571640491486, "learning_rate": 4.9560822944811065e-05, "loss": 0.858709454536438, "step": 1588 }, { "epoch": 1.9496932515337422, "grad_norm": 0.17889824509620667, "learning_rate": 4.9559885793847307e-05, "loss": 0.8753912448883057, "step": 1589 }, { "epoch": 1.9509202453987728, "grad_norm": 0.17449799180030823, "learning_rate": 4.9558947652944596e-05, "loss": 0.7751940488815308, "step": 1590 }, { "epoch": 1.9521472392638037, "grad_norm": 0.19970138370990753, "learning_rate": 4.9558008522140766e-05, "loss": 0.898255467414856, "step": 1591 }, { "epoch": 1.9533742331288344, "grad_norm": 0.21027308702468872, "learning_rate": 4.955706840147364e-05, "loss": 0.8028978705406189, "step": 1592 }, { "epoch": 1.954601226993865, "grad_norm": 0.20557639002799988, "learning_rate": 4.9556127290981125e-05, "loss": 0.8730930685997009, "step": 1593 }, { "epoch": 1.9558282208588957, "grad_norm": 0.17428115010261536, "learning_rate": 4.955518519070116e-05, "loss": 0.8609851598739624, "step": 1594 }, { "epoch": 1.9570552147239264, "grad_norm": 0.173932284116745, "learning_rate": 4.955424210067171e-05, "loss": 1.0425463914871216, "step": 1595 }, { "epoch": 1.958282208588957, "grad_norm": 0.19946575164794922, "learning_rate": 4.9553298020930794e-05, "loss": 0.851524829864502, "step": 1596 }, { "epoch": 1.9595092024539877, "grad_norm": 0.19051168859004974, "learning_rate": 4.9552352951516466e-05, "loss": 0.7984667420387268, "step": 1597 }, { "epoch": 1.9607361963190184, "grad_norm": 0.15322986245155334, "learning_rate": 4.955140689246681e-05, "loss": 0.8105825185775757, "step": 1598 }, { "epoch": 1.961963190184049, "grad_norm": 0.19260355830192566, "learning_rate": 4.955045984381997e-05, "loss": 0.9629879593849182, "step": 1599 }, { "epoch": 1.9631901840490797, "grad_norm": 0.17733247578144073, "learning_rate": 4.9549511805614114e-05, "loss": 0.8102517127990723, "step": 1600 }, { "epoch": 1.9644171779141104, "grad_norm": 0.21954244375228882, "learning_rate": 4.954856277788745e-05, "loss": 0.7848760485649109, "step": 1601 }, { "epoch": 1.965644171779141, "grad_norm": 0.16174381971359253, "learning_rate": 4.9547612760678236e-05, "loss": 0.7993963956832886, "step": 1602 }, { "epoch": 1.9668711656441717, "grad_norm": 0.158223956823349, "learning_rate": 4.954666175402476e-05, "loss": 0.9370762705802917, "step": 1603 }, { "epoch": 1.9680981595092024, "grad_norm": 0.19703014194965363, "learning_rate": 4.9545709757965366e-05, "loss": 0.7869076728820801, "step": 1604 }, { "epoch": 1.969325153374233, "grad_norm": 0.17615534365177155, "learning_rate": 4.954475677253841e-05, "loss": 0.830022931098938, "step": 1605 }, { "epoch": 1.9705521472392638, "grad_norm": 0.17077799141407013, "learning_rate": 4.954380279778232e-05, "loss": 0.9707362651824951, "step": 1606 }, { "epoch": 1.9717791411042946, "grad_norm": 0.2131909728050232, "learning_rate": 4.954284783373554e-05, "loss": 0.8908424377441406, "step": 1607 }, { "epoch": 1.9730061349693253, "grad_norm": 0.19436518847942352, "learning_rate": 4.954189188043656e-05, "loss": 0.9089633226394653, "step": 1608 }, { "epoch": 1.974233128834356, "grad_norm": 0.162260040640831, "learning_rate": 4.954093493792392e-05, "loss": 0.8734192848205566, "step": 1609 }, { "epoch": 1.9754601226993866, "grad_norm": 0.17798344790935516, "learning_rate": 4.9539977006236175e-05, "loss": 0.8313127756118774, "step": 1610 }, { "epoch": 1.9766871165644173, "grad_norm": 0.18377558887004852, "learning_rate": 4.9539018085411953e-05, "loss": 0.9267680644989014, "step": 1611 }, { "epoch": 1.977914110429448, "grad_norm": 0.19320173561573029, "learning_rate": 4.9538058175489896e-05, "loss": 0.7250254154205322, "step": 1612 }, { "epoch": 1.9791411042944786, "grad_norm": 0.16717851161956787, "learning_rate": 4.9537097276508704e-05, "loss": 0.8441028594970703, "step": 1613 }, { "epoch": 1.9803680981595093, "grad_norm": 0.1923324167728424, "learning_rate": 4.95361353885071e-05, "loss": 1.050743818283081, "step": 1614 }, { "epoch": 1.98159509202454, "grad_norm": 0.17414087057113647, "learning_rate": 4.953517251152385e-05, "loss": 1.0293493270874023, "step": 1615 }, { "epoch": 1.9828220858895707, "grad_norm": 0.19824498891830444, "learning_rate": 4.9534208645597785e-05, "loss": 1.0100815296173096, "step": 1616 }, { "epoch": 1.9840490797546013, "grad_norm": 0.1972290575504303, "learning_rate": 4.953324379076773e-05, "loss": 0.9363586902618408, "step": 1617 }, { "epoch": 1.985276073619632, "grad_norm": 0.2718610167503357, "learning_rate": 4.9532277947072604e-05, "loss": 0.9047354459762573, "step": 1618 }, { "epoch": 1.9865030674846627, "grad_norm": 0.19859041273593903, "learning_rate": 4.9531311114551314e-05, "loss": 0.6937170624732971, "step": 1619 }, { "epoch": 1.9877300613496933, "grad_norm": 0.1990155577659607, "learning_rate": 4.953034329324284e-05, "loss": 0.8925957083702087, "step": 1620 }, { "epoch": 1.988957055214724, "grad_norm": 0.18618212640285492, "learning_rate": 4.9529374483186193e-05, "loss": 0.8521758913993835, "step": 1621 }, { "epoch": 1.9901840490797547, "grad_norm": 0.17921239137649536, "learning_rate": 4.952840468442042e-05, "loss": 0.9639671444892883, "step": 1622 }, { "epoch": 1.9914110429447853, "grad_norm": 0.19817902147769928, "learning_rate": 4.95274338969846e-05, "loss": 0.7283823490142822, "step": 1623 }, { "epoch": 1.992638036809816, "grad_norm": 0.1847476363182068, "learning_rate": 4.952646212091789e-05, "loss": 0.8718445301055908, "step": 1624 }, { "epoch": 1.9938650306748467, "grad_norm": 0.23159126937389374, "learning_rate": 4.952548935625944e-05, "loss": 0.7852036356925964, "step": 1625 }, { "epoch": 1.9950920245398773, "grad_norm": 0.1849365085363388, "learning_rate": 4.952451560304845e-05, "loss": 0.7992929220199585, "step": 1626 }, { "epoch": 1.996319018404908, "grad_norm": 0.18027223646640778, "learning_rate": 4.95235408613242e-05, "loss": 0.842436671257019, "step": 1627 }, { "epoch": 1.9975460122699387, "grad_norm": 0.20304088294506073, "learning_rate": 4.9522565131125955e-05, "loss": 0.9866443872451782, "step": 1628 }, { "epoch": 1.9987730061349693, "grad_norm": 0.19005103409290314, "learning_rate": 4.952158841249304e-05, "loss": 0.8135406970977783, "step": 1629 }, { "epoch": 2.0, "grad_norm": 0.2945099472999573, "learning_rate": 4.952061070546484e-05, "loss": 0.8223719000816345, "step": 1630 }, { "epoch": 2.0012269938650307, "grad_norm": 0.1521686613559723, "learning_rate": 4.951963201008076e-05, "loss": 0.8781531453132629, "step": 1631 }, { "epoch": 2.0024539877300613, "grad_norm": 0.17374630272388458, "learning_rate": 4.951865232638025e-05, "loss": 0.9925518035888672, "step": 1632 }, { "epoch": 2.003680981595092, "grad_norm": 0.1826077252626419, "learning_rate": 4.9517671654402784e-05, "loss": 0.7848345041275024, "step": 1633 }, { "epoch": 2.0049079754601227, "grad_norm": 0.15286847949028015, "learning_rate": 4.951668999418791e-05, "loss": 1.005510687828064, "step": 1634 }, { "epoch": 2.0061349693251533, "grad_norm": 0.18204835057258606, "learning_rate": 4.951570734577518e-05, "loss": 1.0069103240966797, "step": 1635 }, { "epoch": 2.007361963190184, "grad_norm": 0.18207129836082458, "learning_rate": 4.951472370920421e-05, "loss": 0.8449172973632812, "step": 1636 }, { "epoch": 2.0085889570552147, "grad_norm": 0.20340217649936676, "learning_rate": 4.9513739084514644e-05, "loss": 0.8686966896057129, "step": 1637 }, { "epoch": 2.0098159509202453, "grad_norm": 0.17932139337062836, "learning_rate": 4.951275347174617e-05, "loss": 0.7879431247711182, "step": 1638 }, { "epoch": 2.011042944785276, "grad_norm": 0.2136053591966629, "learning_rate": 4.9511766870938524e-05, "loss": 0.9831984043121338, "step": 1639 }, { "epoch": 2.0122699386503067, "grad_norm": 0.2038228064775467, "learning_rate": 4.951077928213146e-05, "loss": 0.8511407375335693, "step": 1640 }, { "epoch": 2.0134969325153373, "grad_norm": 0.213422030210495, "learning_rate": 4.950979070536479e-05, "loss": 0.8348771333694458, "step": 1641 }, { "epoch": 2.014723926380368, "grad_norm": 0.18444831669330597, "learning_rate": 4.9508801140678355e-05, "loss": 0.8992657661437988, "step": 1642 }, { "epoch": 2.0159509202453987, "grad_norm": 0.17127497494220734, "learning_rate": 4.950781058811205e-05, "loss": 0.9895972013473511, "step": 1643 }, { "epoch": 2.0171779141104293, "grad_norm": 0.176238551735878, "learning_rate": 4.950681904770581e-05, "loss": 0.8040411472320557, "step": 1644 }, { "epoch": 2.01840490797546, "grad_norm": 0.22370481491088867, "learning_rate": 4.950582651949958e-05, "loss": 0.7989240884780884, "step": 1645 }, { "epoch": 2.0196319018404907, "grad_norm": 0.2280738204717636, "learning_rate": 4.950483300353337e-05, "loss": 0.6978750228881836, "step": 1646 }, { "epoch": 2.0208588957055214, "grad_norm": 0.17136648297309875, "learning_rate": 4.9503838499847237e-05, "loss": 0.9405577778816223, "step": 1647 }, { "epoch": 2.022085889570552, "grad_norm": 0.20736786723136902, "learning_rate": 4.950284300848127e-05, "loss": 0.864676833152771, "step": 1648 }, { "epoch": 2.0233128834355827, "grad_norm": 0.18180687725543976, "learning_rate": 4.950184652947557e-05, "loss": 0.840674877166748, "step": 1649 }, { "epoch": 2.0245398773006134, "grad_norm": 0.200373113155365, "learning_rate": 4.950084906287032e-05, "loss": 0.7555365562438965, "step": 1650 }, { "epoch": 2.025766871165644, "grad_norm": 0.2095939815044403, "learning_rate": 4.949985060870573e-05, "loss": 0.6740462183952332, "step": 1651 }, { "epoch": 2.0269938650306747, "grad_norm": 0.22072924673557281, "learning_rate": 4.949885116702204e-05, "loss": 0.8035053610801697, "step": 1652 }, { "epoch": 2.0282208588957054, "grad_norm": 0.21316197514533997, "learning_rate": 4.949785073785953e-05, "loss": 0.8779615759849548, "step": 1653 }, { "epoch": 2.029447852760736, "grad_norm": 0.23401542007923126, "learning_rate": 4.9496849321258534e-05, "loss": 0.9226788282394409, "step": 1654 }, { "epoch": 2.0306748466257667, "grad_norm": 0.1642349511384964, "learning_rate": 4.949584691725939e-05, "loss": 0.9456343650817871, "step": 1655 }, { "epoch": 2.0319018404907974, "grad_norm": 0.17017386853694916, "learning_rate": 4.949484352590253e-05, "loss": 1.079079031944275, "step": 1656 }, { "epoch": 2.033128834355828, "grad_norm": 0.20734520256519318, "learning_rate": 4.949383914722839e-05, "loss": 0.7097644209861755, "step": 1657 }, { "epoch": 2.0343558282208587, "grad_norm": 0.24555912613868713, "learning_rate": 4.949283378127746e-05, "loss": 0.736696720123291, "step": 1658 }, { "epoch": 2.0355828220858894, "grad_norm": 0.1912710964679718, "learning_rate": 4.9491827428090255e-05, "loss": 0.8878364562988281, "step": 1659 }, { "epoch": 2.03680981595092, "grad_norm": 0.17265860736370087, "learning_rate": 4.9490820087707326e-05, "loss": 0.9179648160934448, "step": 1660 }, { "epoch": 2.038036809815951, "grad_norm": 0.16571366786956787, "learning_rate": 4.9489811760169304e-05, "loss": 0.897253692150116, "step": 1661 }, { "epoch": 2.039263803680982, "grad_norm": 0.1944453865289688, "learning_rate": 4.9488802445516804e-05, "loss": 0.9878848791122437, "step": 1662 }, { "epoch": 2.0404907975460125, "grad_norm": 0.18712405860424042, "learning_rate": 4.9487792143790526e-05, "loss": 0.9314237833023071, "step": 1663 }, { "epoch": 2.041717791411043, "grad_norm": 0.17743439972400665, "learning_rate": 4.948678085503119e-05, "loss": 0.9599044322967529, "step": 1664 }, { "epoch": 2.042944785276074, "grad_norm": 0.2099919468164444, "learning_rate": 4.948576857927956e-05, "loss": 0.7470840215682983, "step": 1665 }, { "epoch": 2.0441717791411045, "grad_norm": 0.191939115524292, "learning_rate": 4.9484755316576424e-05, "loss": 1.0091993808746338, "step": 1666 }, { "epoch": 2.045398773006135, "grad_norm": 0.22321666777133942, "learning_rate": 4.9483741066962644e-05, "loss": 0.7508227825164795, "step": 1667 }, { "epoch": 2.046625766871166, "grad_norm": 0.2174069732427597, "learning_rate": 4.9482725830479086e-05, "loss": 0.9171335697174072, "step": 1668 }, { "epoch": 2.0478527607361965, "grad_norm": 0.197279691696167, "learning_rate": 4.948170960716668e-05, "loss": 0.7182982563972473, "step": 1669 }, { "epoch": 2.049079754601227, "grad_norm": 0.1969626545906067, "learning_rate": 4.9480692397066386e-05, "loss": 0.9526978135108948, "step": 1670 }, { "epoch": 2.050306748466258, "grad_norm": 0.18008632957935333, "learning_rate": 4.94796742002192e-05, "loss": 0.8143914937973022, "step": 1671 }, { "epoch": 2.0515337423312885, "grad_norm": 0.22377322614192963, "learning_rate": 4.947865501666616e-05, "loss": 0.8019245266914368, "step": 1672 }, { "epoch": 2.052760736196319, "grad_norm": 0.20660145580768585, "learning_rate": 4.9477634846448354e-05, "loss": 0.7157447338104248, "step": 1673 }, { "epoch": 2.05398773006135, "grad_norm": 0.21560384333133698, "learning_rate": 4.9476613689606896e-05, "loss": 0.8756164908409119, "step": 1674 }, { "epoch": 2.0552147239263805, "grad_norm": 0.17721278965473175, "learning_rate": 4.947559154618296e-05, "loss": 0.8824244737625122, "step": 1675 }, { "epoch": 2.056441717791411, "grad_norm": 0.23694707453250885, "learning_rate": 4.947456841621773e-05, "loss": 0.73320472240448, "step": 1676 }, { "epoch": 2.057668711656442, "grad_norm": 0.19862566888332367, "learning_rate": 4.947354429975245e-05, "loss": 0.8377892374992371, "step": 1677 }, { "epoch": 2.0588957055214725, "grad_norm": 0.2807924747467041, "learning_rate": 4.94725191968284e-05, "loss": 0.7460290193557739, "step": 1678 }, { "epoch": 2.060122699386503, "grad_norm": 0.22929534316062927, "learning_rate": 4.94714931074869e-05, "loss": 0.6831473708152771, "step": 1679 }, { "epoch": 2.061349693251534, "grad_norm": 0.16616696119308472, "learning_rate": 4.9470466031769306e-05, "loss": 0.933701753616333, "step": 1680 }, { "epoch": 2.0625766871165645, "grad_norm": 0.2023056149482727, "learning_rate": 4.946943796971702e-05, "loss": 0.9454199075698853, "step": 1681 }, { "epoch": 2.063803680981595, "grad_norm": 0.15178856253623962, "learning_rate": 4.946840892137148e-05, "loss": 0.9307025671005249, "step": 1682 }, { "epoch": 2.065030674846626, "grad_norm": 0.1891602724790573, "learning_rate": 4.946737888677416e-05, "loss": 0.8457355499267578, "step": 1683 }, { "epoch": 2.0662576687116565, "grad_norm": 0.18508054316043854, "learning_rate": 4.946634786596658e-05, "loss": 1.034746766090393, "step": 1684 }, { "epoch": 2.067484662576687, "grad_norm": 0.1761503964662552, "learning_rate": 4.9465315858990305e-05, "loss": 0.7929325103759766, "step": 1685 }, { "epoch": 2.068711656441718, "grad_norm": 0.19135379791259766, "learning_rate": 4.9464282865886916e-05, "loss": 0.9146130084991455, "step": 1686 }, { "epoch": 2.0699386503067485, "grad_norm": 0.17698673903942108, "learning_rate": 4.946324888669805e-05, "loss": 1.0091357231140137, "step": 1687 }, { "epoch": 2.071165644171779, "grad_norm": 0.186275914311409, "learning_rate": 4.9462213921465416e-05, "loss": 0.8714848756790161, "step": 1688 }, { "epoch": 2.07239263803681, "grad_norm": 0.18283571302890778, "learning_rate": 4.9461177970230695e-05, "loss": 0.9213558435440063, "step": 1689 }, { "epoch": 2.0736196319018405, "grad_norm": 0.21799436211585999, "learning_rate": 4.946014103303566e-05, "loss": 0.7548530101776123, "step": 1690 }, { "epoch": 2.074846625766871, "grad_norm": 0.15944938361644745, "learning_rate": 4.945910310992209e-05, "loss": 0.9390072822570801, "step": 1691 }, { "epoch": 2.076073619631902, "grad_norm": 0.20807668566703796, "learning_rate": 4.945806420093184e-05, "loss": 0.9734139442443848, "step": 1692 }, { "epoch": 2.0773006134969325, "grad_norm": 0.24932053685188293, "learning_rate": 4.945702430610678e-05, "loss": 0.9029139876365662, "step": 1693 }, { "epoch": 2.078527607361963, "grad_norm": 0.2192278951406479, "learning_rate": 4.945598342548883e-05, "loss": 0.8422669172286987, "step": 1694 }, { "epoch": 2.079754601226994, "grad_norm": 0.20364372432231903, "learning_rate": 4.945494155911993e-05, "loss": 0.8304034471511841, "step": 1695 }, { "epoch": 2.0809815950920245, "grad_norm": 0.21961647272109985, "learning_rate": 4.945389870704209e-05, "loss": 0.8562206029891968, "step": 1696 }, { "epoch": 2.082208588957055, "grad_norm": 0.17311997711658478, "learning_rate": 4.945285486929733e-05, "loss": 0.9726505279541016, "step": 1697 }, { "epoch": 2.083435582822086, "grad_norm": 0.1777816116809845, "learning_rate": 4.945181004592774e-05, "loss": 0.8354659080505371, "step": 1698 }, { "epoch": 2.0846625766871165, "grad_norm": 0.17294231057167053, "learning_rate": 4.945076423697542e-05, "loss": 1.0571879148483276, "step": 1699 }, { "epoch": 2.085889570552147, "grad_norm": 0.17461422085762024, "learning_rate": 4.944971744248253e-05, "loss": 1.0108742713928223, "step": 1700 }, { "epoch": 2.087116564417178, "grad_norm": 0.20306828618049622, "learning_rate": 4.944866966249126e-05, "loss": 0.6880764961242676, "step": 1701 }, { "epoch": 2.0883435582822085, "grad_norm": 0.1975422352552414, "learning_rate": 4.944762089704386e-05, "loss": 0.8344519138336182, "step": 1702 }, { "epoch": 2.089570552147239, "grad_norm": 0.20298948884010315, "learning_rate": 4.9446571146182575e-05, "loss": 0.8106217384338379, "step": 1703 }, { "epoch": 2.09079754601227, "grad_norm": 0.2012016624212265, "learning_rate": 4.944552040994973e-05, "loss": 0.8371896743774414, "step": 1704 }, { "epoch": 2.0920245398773005, "grad_norm": 0.17609119415283203, "learning_rate": 4.944446868838768e-05, "loss": 0.8801376819610596, "step": 1705 }, { "epoch": 2.093251533742331, "grad_norm": 0.18261933326721191, "learning_rate": 4.944341598153882e-05, "loss": 0.8449097871780396, "step": 1706 }, { "epoch": 2.094478527607362, "grad_norm": 0.17917321622371674, "learning_rate": 4.9442362289445573e-05, "loss": 0.8331921100616455, "step": 1707 }, { "epoch": 2.0957055214723925, "grad_norm": 0.15865972638130188, "learning_rate": 4.944130761215041e-05, "loss": 0.8956501483917236, "step": 1708 }, { "epoch": 2.096932515337423, "grad_norm": 0.18717347085475922, "learning_rate": 4.944025194969586e-05, "loss": 0.8281142711639404, "step": 1709 }, { "epoch": 2.098159509202454, "grad_norm": 0.22618184983730316, "learning_rate": 4.943919530212445e-05, "loss": 0.7792078852653503, "step": 1710 }, { "epoch": 2.0993865030674845, "grad_norm": 0.19780519604682922, "learning_rate": 4.943813766947878e-05, "loss": 0.9436835050582886, "step": 1711 }, { "epoch": 2.100613496932515, "grad_norm": 0.22660313546657562, "learning_rate": 4.9437079051801476e-05, "loss": 0.8243879675865173, "step": 1712 }, { "epoch": 2.101840490797546, "grad_norm": 0.16560526192188263, "learning_rate": 4.943601944913522e-05, "loss": 0.846989631652832, "step": 1713 }, { "epoch": 2.1030674846625765, "grad_norm": 0.2113720327615738, "learning_rate": 4.943495886152271e-05, "loss": 0.936052143573761, "step": 1714 }, { "epoch": 2.104294478527607, "grad_norm": 0.18506301939487457, "learning_rate": 4.94338972890067e-05, "loss": 0.8983745574951172, "step": 1715 }, { "epoch": 2.105521472392638, "grad_norm": 0.15955255925655365, "learning_rate": 4.943283473162997e-05, "loss": 0.9274181127548218, "step": 1716 }, { "epoch": 2.1067484662576685, "grad_norm": 0.2550275921821594, "learning_rate": 4.943177118943537e-05, "loss": 0.7646840810775757, "step": 1717 }, { "epoch": 2.107975460122699, "grad_norm": 0.22377550601959229, "learning_rate": 4.9430706662465746e-05, "loss": 0.7858836054801941, "step": 1718 }, { "epoch": 2.10920245398773, "grad_norm": 0.18213807046413422, "learning_rate": 4.9429641150764015e-05, "loss": 0.9623993635177612, "step": 1719 }, { "epoch": 2.1104294478527605, "grad_norm": 0.1827125996351242, "learning_rate": 4.9428574654373134e-05, "loss": 0.9368500709533691, "step": 1720 }, { "epoch": 2.111656441717791, "grad_norm": 0.20160911977291107, "learning_rate": 4.942750717333608e-05, "loss": 0.8806395530700684, "step": 1721 }, { "epoch": 2.112883435582822, "grad_norm": 0.21810618042945862, "learning_rate": 4.942643870769587e-05, "loss": 0.7568444013595581, "step": 1722 }, { "epoch": 2.1141104294478525, "grad_norm": 0.19544316828250885, "learning_rate": 4.942536925749559e-05, "loss": 0.8192753195762634, "step": 1723 }, { "epoch": 2.1153374233128837, "grad_norm": 0.17238681018352509, "learning_rate": 4.942429882277834e-05, "loss": 0.900439977645874, "step": 1724 }, { "epoch": 2.116564417177914, "grad_norm": 0.20676706731319427, "learning_rate": 4.942322740358726e-05, "loss": 0.965875506401062, "step": 1725 }, { "epoch": 2.117791411042945, "grad_norm": 0.19458384811878204, "learning_rate": 4.9422154999965535e-05, "loss": 0.8489663004875183, "step": 1726 }, { "epoch": 2.1190184049079757, "grad_norm": 0.17645421624183655, "learning_rate": 4.94210816119564e-05, "loss": 0.9402665495872498, "step": 1727 }, { "epoch": 2.1202453987730063, "grad_norm": 0.19172224402427673, "learning_rate": 4.942000723960313e-05, "loss": 0.9269077777862549, "step": 1728 }, { "epoch": 2.121472392638037, "grad_norm": 0.19958554208278656, "learning_rate": 4.9418931882949e-05, "loss": 0.8454971313476562, "step": 1729 }, { "epoch": 2.1226993865030677, "grad_norm": 0.19598406553268433, "learning_rate": 4.941785554203738e-05, "loss": 0.8839153051376343, "step": 1730 }, { "epoch": 2.1239263803680983, "grad_norm": 0.20366765558719635, "learning_rate": 4.9416778216911644e-05, "loss": 0.9746721982955933, "step": 1731 }, { "epoch": 2.125153374233129, "grad_norm": 0.21722380816936493, "learning_rate": 4.941569990761521e-05, "loss": 0.8112891912460327, "step": 1732 }, { "epoch": 2.1263803680981597, "grad_norm": 0.21256732940673828, "learning_rate": 4.941462061419155e-05, "loss": 0.8895362615585327, "step": 1733 }, { "epoch": 2.1276073619631903, "grad_norm": 0.18714895844459534, "learning_rate": 4.941354033668417e-05, "loss": 0.8519076108932495, "step": 1734 }, { "epoch": 2.128834355828221, "grad_norm": 0.1773018091917038, "learning_rate": 4.941245907513661e-05, "loss": 0.9292235374450684, "step": 1735 }, { "epoch": 2.1300613496932517, "grad_norm": 0.18910489976406097, "learning_rate": 4.941137682959245e-05, "loss": 0.8214740753173828, "step": 1736 }, { "epoch": 2.1312883435582823, "grad_norm": 0.2003478854894638, "learning_rate": 4.941029360009531e-05, "loss": 0.9072068929672241, "step": 1737 }, { "epoch": 2.132515337423313, "grad_norm": 0.22378408908843994, "learning_rate": 4.9409209386688856e-05, "loss": 0.8725747466087341, "step": 1738 }, { "epoch": 2.1337423312883437, "grad_norm": 0.20957589149475098, "learning_rate": 4.9408124189416785e-05, "loss": 0.8974407315254211, "step": 1739 }, { "epoch": 2.1349693251533743, "grad_norm": 0.18038751184940338, "learning_rate": 4.940703800832285e-05, "loss": 1.00519597530365, "step": 1740 }, { "epoch": 2.136196319018405, "grad_norm": 0.2395613193511963, "learning_rate": 4.940595084345082e-05, "loss": 0.7963145971298218, "step": 1741 }, { "epoch": 2.1374233128834357, "grad_norm": 0.20691628754138947, "learning_rate": 4.940486269484452e-05, "loss": 0.922330915927887, "step": 1742 }, { "epoch": 2.1386503067484663, "grad_norm": 0.15364111959934235, "learning_rate": 4.9403773562547813e-05, "loss": 0.8963066339492798, "step": 1743 }, { "epoch": 2.139877300613497, "grad_norm": 0.18686838448047638, "learning_rate": 4.9402683446604593e-05, "loss": 0.8508501648902893, "step": 1744 }, { "epoch": 2.1411042944785277, "grad_norm": 0.21295121312141418, "learning_rate": 4.9401592347058804e-05, "loss": 0.9420541524887085, "step": 1745 }, { "epoch": 2.1423312883435583, "grad_norm": 0.22338958084583282, "learning_rate": 4.940050026395442e-05, "loss": 0.8673956394195557, "step": 1746 }, { "epoch": 2.143558282208589, "grad_norm": 0.18329593539237976, "learning_rate": 4.9399407197335454e-05, "loss": 0.9467500448226929, "step": 1747 }, { "epoch": 2.1447852760736197, "grad_norm": 0.23239915072917938, "learning_rate": 4.939831314724599e-05, "loss": 0.7665014266967773, "step": 1748 }, { "epoch": 2.1460122699386504, "grad_norm": 0.21519051492214203, "learning_rate": 4.93972181137301e-05, "loss": 0.7937390804290771, "step": 1749 }, { "epoch": 2.147239263803681, "grad_norm": 0.1700495481491089, "learning_rate": 4.939612209683193e-05, "loss": 0.8164160251617432, "step": 1750 }, { "epoch": 2.1484662576687117, "grad_norm": 0.1712346076965332, "learning_rate": 4.9395025096595657e-05, "loss": 0.847833514213562, "step": 1751 }, { "epoch": 2.1496932515337424, "grad_norm": 0.1691197156906128, "learning_rate": 4.939392711306551e-05, "loss": 0.9132643938064575, "step": 1752 }, { "epoch": 2.150920245398773, "grad_norm": 0.17052313685417175, "learning_rate": 4.939282814628573e-05, "loss": 0.927170991897583, "step": 1753 }, { "epoch": 2.1521472392638037, "grad_norm": 0.22405380010604858, "learning_rate": 4.939172819630061e-05, "loss": 0.8122763633728027, "step": 1754 }, { "epoch": 2.1533742331288344, "grad_norm": 0.1732129454612732, "learning_rate": 4.93906272631545e-05, "loss": 0.9200667142868042, "step": 1755 }, { "epoch": 2.154601226993865, "grad_norm": 0.1723337471485138, "learning_rate": 4.9389525346891766e-05, "loss": 0.8923888802528381, "step": 1756 }, { "epoch": 2.1558282208588957, "grad_norm": 0.16271232068538666, "learning_rate": 4.938842244755683e-05, "loss": 0.8787403106689453, "step": 1757 }, { "epoch": 2.1570552147239264, "grad_norm": 0.21881242096424103, "learning_rate": 4.938731856519414e-05, "loss": 0.8613224029541016, "step": 1758 }, { "epoch": 2.158282208588957, "grad_norm": 0.1892397105693817, "learning_rate": 4.93862136998482e-05, "loss": 0.8195128440856934, "step": 1759 }, { "epoch": 2.1595092024539877, "grad_norm": 0.24130593240261078, "learning_rate": 4.938510785156353e-05, "loss": 0.6902790069580078, "step": 1760 }, { "epoch": 2.1607361963190184, "grad_norm": 0.18955886363983154, "learning_rate": 4.938400102038472e-05, "loss": 0.9095118045806885, "step": 1761 }, { "epoch": 2.161963190184049, "grad_norm": 0.19030968844890594, "learning_rate": 4.938289320635636e-05, "loss": 0.8595201969146729, "step": 1762 }, { "epoch": 2.1631901840490797, "grad_norm": 0.21820920705795288, "learning_rate": 4.938178440952313e-05, "loss": 0.7560676336288452, "step": 1763 }, { "epoch": 2.1644171779141104, "grad_norm": 0.16867706179618835, "learning_rate": 4.9380674629929704e-05, "loss": 0.7947003841400146, "step": 1764 }, { "epoch": 2.165644171779141, "grad_norm": 0.1761407107114792, "learning_rate": 4.937956386762082e-05, "loss": 0.8725998401641846, "step": 1765 }, { "epoch": 2.1668711656441717, "grad_norm": 0.22902560234069824, "learning_rate": 4.937845212264125e-05, "loss": 0.7523629069328308, "step": 1766 }, { "epoch": 2.1680981595092024, "grad_norm": 0.18528757989406586, "learning_rate": 4.93773393950358e-05, "loss": 0.7581295967102051, "step": 1767 }, { "epoch": 2.169325153374233, "grad_norm": 0.19230039417743683, "learning_rate": 4.937622568484933e-05, "loss": 0.8783042430877686, "step": 1768 }, { "epoch": 2.1705521472392637, "grad_norm": 0.1760653257369995, "learning_rate": 4.9375110992126725e-05, "loss": 0.8123432993888855, "step": 1769 }, { "epoch": 2.1717791411042944, "grad_norm": 0.20742250978946686, "learning_rate": 4.937399531691291e-05, "loss": 0.7356036305427551, "step": 1770 }, { "epoch": 2.173006134969325, "grad_norm": 0.21279025077819824, "learning_rate": 4.9372878659252874e-05, "loss": 0.7599064111709595, "step": 1771 }, { "epoch": 2.1742331288343557, "grad_norm": 0.2948560118675232, "learning_rate": 4.9371761019191605e-05, "loss": 0.6336374282836914, "step": 1772 }, { "epoch": 2.1754601226993864, "grad_norm": 0.19961413741111755, "learning_rate": 4.937064239677416e-05, "loss": 0.9982672929763794, "step": 1773 }, { "epoch": 2.176687116564417, "grad_norm": 0.19578079879283905, "learning_rate": 4.936952279204563e-05, "loss": 1.0044589042663574, "step": 1774 }, { "epoch": 2.1779141104294477, "grad_norm": 0.1741236001253128, "learning_rate": 4.936840220505114e-05, "loss": 0.8997514247894287, "step": 1775 }, { "epoch": 2.1791411042944784, "grad_norm": 0.16453072428703308, "learning_rate": 4.936728063583585e-05, "loss": 0.8939728736877441, "step": 1776 }, { "epoch": 2.180368098159509, "grad_norm": 0.1922045201063156, "learning_rate": 4.936615808444498e-05, "loss": 0.8443198204040527, "step": 1777 }, { "epoch": 2.1815950920245397, "grad_norm": 0.18374919891357422, "learning_rate": 4.9365034550923773e-05, "loss": 0.9671981334686279, "step": 1778 }, { "epoch": 2.1828220858895704, "grad_norm": 0.17852959036827087, "learning_rate": 4.936391003531752e-05, "loss": 1.0389413833618164, "step": 1779 }, { "epoch": 2.184049079754601, "grad_norm": 0.15725937485694885, "learning_rate": 4.936278453767153e-05, "loss": 0.9927434921264648, "step": 1780 }, { "epoch": 2.1852760736196317, "grad_norm": 0.16173319518566132, "learning_rate": 4.936165805803119e-05, "loss": 0.962884247303009, "step": 1781 }, { "epoch": 2.1865030674846624, "grad_norm": 0.18983079493045807, "learning_rate": 4.936053059644189e-05, "loss": 0.9615976810455322, "step": 1782 }, { "epoch": 2.187730061349693, "grad_norm": 0.20197013020515442, "learning_rate": 4.935940215294908e-05, "loss": 0.9084669947624207, "step": 1783 }, { "epoch": 2.1889570552147237, "grad_norm": 0.19031758606433868, "learning_rate": 4.935827272759824e-05, "loss": 0.8898115158081055, "step": 1784 }, { "epoch": 2.190184049079755, "grad_norm": 0.1837746948003769, "learning_rate": 4.935714232043491e-05, "loss": 0.912378191947937, "step": 1785 }, { "epoch": 2.191411042944785, "grad_norm": 0.19466730952262878, "learning_rate": 4.9356010931504637e-05, "loss": 0.970923900604248, "step": 1786 }, { "epoch": 2.192638036809816, "grad_norm": 0.18974843621253967, "learning_rate": 4.9354878560853024e-05, "loss": 0.8839507699012756, "step": 1787 }, { "epoch": 2.1938650306748464, "grad_norm": 0.19656628370285034, "learning_rate": 4.9353745208525727e-05, "loss": 0.8558549880981445, "step": 1788 }, { "epoch": 2.1950920245398775, "grad_norm": 0.17522060871124268, "learning_rate": 4.935261087456841e-05, "loss": 0.8432241082191467, "step": 1789 }, { "epoch": 2.196319018404908, "grad_norm": 0.1755201667547226, "learning_rate": 4.935147555902681e-05, "loss": 1.0228718519210815, "step": 1790 }, { "epoch": 2.197546012269939, "grad_norm": 0.2557326555252075, "learning_rate": 4.935033926194669e-05, "loss": 0.8873909711837769, "step": 1791 }, { "epoch": 2.1987730061349695, "grad_norm": 0.20776693522930145, "learning_rate": 4.9349201983373825e-05, "loss": 0.7904176712036133, "step": 1792 }, { "epoch": 2.2, "grad_norm": 0.21885903179645538, "learning_rate": 4.934806372335409e-05, "loss": 0.8059629201889038, "step": 1793 }, { "epoch": 2.201226993865031, "grad_norm": 0.1781129091978073, "learning_rate": 4.934692448193334e-05, "loss": 0.9598425030708313, "step": 1794 }, { "epoch": 2.2024539877300615, "grad_norm": 0.15753711760044098, "learning_rate": 4.934578425915751e-05, "loss": 0.9085144996643066, "step": 1795 }, { "epoch": 2.203680981595092, "grad_norm": 0.1781756579875946, "learning_rate": 4.934464305507255e-05, "loss": 0.956829309463501, "step": 1796 }, { "epoch": 2.204907975460123, "grad_norm": 0.2096743881702423, "learning_rate": 4.934350086972447e-05, "loss": 0.8532429933547974, "step": 1797 }, { "epoch": 2.2061349693251535, "grad_norm": 0.16606080532073975, "learning_rate": 4.9342357703159285e-05, "loss": 0.9860447645187378, "step": 1798 }, { "epoch": 2.207361963190184, "grad_norm": 0.1842474788427353, "learning_rate": 4.9341213555423095e-05, "loss": 0.9631943702697754, "step": 1799 }, { "epoch": 2.208588957055215, "grad_norm": 0.19112655520439148, "learning_rate": 4.9340068426562016e-05, "loss": 0.974952220916748, "step": 1800 }, { "epoch": 2.2098159509202455, "grad_norm": 0.18010197579860687, "learning_rate": 4.9338922316622186e-05, "loss": 0.7521492838859558, "step": 1801 }, { "epoch": 2.211042944785276, "grad_norm": 0.2185787856578827, "learning_rate": 4.933777522564982e-05, "loss": 0.8370469808578491, "step": 1802 }, { "epoch": 2.212269938650307, "grad_norm": 0.18489889800548553, "learning_rate": 4.933662715369115e-05, "loss": 0.8839148283004761, "step": 1803 }, { "epoch": 2.2134969325153375, "grad_norm": 0.18260003626346588, "learning_rate": 4.933547810079245e-05, "loss": 0.916037917137146, "step": 1804 }, { "epoch": 2.214723926380368, "grad_norm": 0.19236087799072266, "learning_rate": 4.933432806700004e-05, "loss": 0.7710074186325073, "step": 1805 }, { "epoch": 2.215950920245399, "grad_norm": 0.19372797012329102, "learning_rate": 4.933317705236026e-05, "loss": 0.7778733968734741, "step": 1806 }, { "epoch": 2.2171779141104295, "grad_norm": 0.19350777566432953, "learning_rate": 4.9332025056919506e-05, "loss": 0.8768143653869629, "step": 1807 }, { "epoch": 2.21840490797546, "grad_norm": 0.19831211864948273, "learning_rate": 4.9330872080724234e-05, "loss": 1.002855658531189, "step": 1808 }, { "epoch": 2.219631901840491, "grad_norm": 0.1877560019493103, "learning_rate": 4.9329718123820895e-05, "loss": 0.8178799152374268, "step": 1809 }, { "epoch": 2.2208588957055215, "grad_norm": 0.242794930934906, "learning_rate": 4.932856318625602e-05, "loss": 0.7570099830627441, "step": 1810 }, { "epoch": 2.222085889570552, "grad_norm": 0.16927258670330048, "learning_rate": 4.932740726807613e-05, "loss": 0.887635350227356, "step": 1811 }, { "epoch": 2.223312883435583, "grad_norm": 0.17985133826732635, "learning_rate": 4.932625036932785e-05, "loss": 0.8407031893730164, "step": 1812 }, { "epoch": 2.2245398773006135, "grad_norm": 0.19903263449668884, "learning_rate": 4.93250924900578e-05, "loss": 0.8240410089492798, "step": 1813 }, { "epoch": 2.225766871165644, "grad_norm": 0.15591037273406982, "learning_rate": 4.932393363031264e-05, "loss": 0.9913667440414429, "step": 1814 }, { "epoch": 2.226993865030675, "grad_norm": 0.14662232995033264, "learning_rate": 4.93227737901391e-05, "loss": 0.8947621583938599, "step": 1815 }, { "epoch": 2.2282208588957055, "grad_norm": 0.18929153680801392, "learning_rate": 4.9321612969583915e-05, "loss": 1.0244574546813965, "step": 1816 }, { "epoch": 2.229447852760736, "grad_norm": 0.16851113736629486, "learning_rate": 4.932045116869388e-05, "loss": 0.9621454477310181, "step": 1817 }, { "epoch": 2.230674846625767, "grad_norm": 0.19948624074459076, "learning_rate": 4.931928838751582e-05, "loss": 0.9421753883361816, "step": 1818 }, { "epoch": 2.2319018404907975, "grad_norm": 0.2256450355052948, "learning_rate": 4.9318124626096604e-05, "loss": 0.7124794721603394, "step": 1819 }, { "epoch": 2.233128834355828, "grad_norm": 0.16955120861530304, "learning_rate": 4.931695988448314e-05, "loss": 0.9347532987594604, "step": 1820 }, { "epoch": 2.234355828220859, "grad_norm": 0.15607987344264984, "learning_rate": 4.9315794162722385e-05, "loss": 0.9588372707366943, "step": 1821 }, { "epoch": 2.2355828220858895, "grad_norm": 0.21290083229541779, "learning_rate": 4.9314627460861314e-05, "loss": 0.7620859146118164, "step": 1822 }, { "epoch": 2.23680981595092, "grad_norm": 0.23899617791175842, "learning_rate": 4.931345977894696e-05, "loss": 0.7600529193878174, "step": 1823 }, { "epoch": 2.238036809815951, "grad_norm": 0.29988041520118713, "learning_rate": 4.931229111702638e-05, "loss": 0.9021822810173035, "step": 1824 }, { "epoch": 2.2392638036809815, "grad_norm": 0.19205276668071747, "learning_rate": 4.9311121475146696e-05, "loss": 0.8885611891746521, "step": 1825 }, { "epoch": 2.240490797546012, "grad_norm": 0.18131209909915924, "learning_rate": 4.930995085335503e-05, "loss": 0.9161792993545532, "step": 1826 }, { "epoch": 2.241717791411043, "grad_norm": 0.17974436283111572, "learning_rate": 4.930877925169859e-05, "loss": 0.8560193777084351, "step": 1827 }, { "epoch": 2.2429447852760735, "grad_norm": 0.23796716332435608, "learning_rate": 4.930760667022458e-05, "loss": 0.722028911113739, "step": 1828 }, { "epoch": 2.244171779141104, "grad_norm": 0.1858932375907898, "learning_rate": 4.930643310898028e-05, "loss": 0.8086556196212769, "step": 1829 }, { "epoch": 2.245398773006135, "grad_norm": 0.16646450757980347, "learning_rate": 4.930525856801298e-05, "loss": 0.9248192310333252, "step": 1830 }, { "epoch": 2.2466257668711656, "grad_norm": 0.1897035390138626, "learning_rate": 4.9304083047370034e-05, "loss": 0.9065861701965332, "step": 1831 }, { "epoch": 2.247852760736196, "grad_norm": 0.19254611432552338, "learning_rate": 4.930290654709881e-05, "loss": 0.9315174221992493, "step": 1832 }, { "epoch": 2.249079754601227, "grad_norm": 0.19995644688606262, "learning_rate": 4.930172906724675e-05, "loss": 0.9167599678039551, "step": 1833 }, { "epoch": 2.2503067484662576, "grad_norm": 0.23205560445785522, "learning_rate": 4.930055060786129e-05, "loss": 0.7019233703613281, "step": 1834 }, { "epoch": 2.2515337423312882, "grad_norm": 0.18404534459114075, "learning_rate": 4.929937116898995e-05, "loss": 0.9920074939727783, "step": 1835 }, { "epoch": 2.252760736196319, "grad_norm": 0.21558059751987457, "learning_rate": 4.929819075068026e-05, "loss": 0.8028953671455383, "step": 1836 }, { "epoch": 2.2539877300613496, "grad_norm": 0.21013858914375305, "learning_rate": 4.9297009352979795e-05, "loss": 0.8135106563568115, "step": 1837 }, { "epoch": 2.2552147239263802, "grad_norm": 0.224039226770401, "learning_rate": 4.929582697593619e-05, "loss": 0.8847308158874512, "step": 1838 }, { "epoch": 2.256441717791411, "grad_norm": 0.16386879980564117, "learning_rate": 4.929464361959709e-05, "loss": 0.9840419292449951, "step": 1839 }, { "epoch": 2.2576687116564416, "grad_norm": 0.1852705478668213, "learning_rate": 4.929345928401019e-05, "loss": 0.9432888031005859, "step": 1840 }, { "epoch": 2.2588957055214722, "grad_norm": 0.22374227643013, "learning_rate": 4.929227396922324e-05, "loss": 0.7874425649642944, "step": 1841 }, { "epoch": 2.260122699386503, "grad_norm": 0.20756179094314575, "learning_rate": 4.929108767528401e-05, "loss": 0.8274835348129272, "step": 1842 }, { "epoch": 2.2613496932515336, "grad_norm": 0.20150253176689148, "learning_rate": 4.928990040224031e-05, "loss": 0.9313368797302246, "step": 1843 }, { "epoch": 2.2625766871165642, "grad_norm": 0.20420697331428528, "learning_rate": 4.928871215014001e-05, "loss": 0.8066107630729675, "step": 1844 }, { "epoch": 2.263803680981595, "grad_norm": 0.22802172601222992, "learning_rate": 4.928752291903099e-05, "loss": 0.7472622990608215, "step": 1845 }, { "epoch": 2.265030674846626, "grad_norm": 0.2356645166873932, "learning_rate": 4.92863327089612e-05, "loss": 0.7347790598869324, "step": 1846 }, { "epoch": 2.2662576687116562, "grad_norm": 0.225774884223938, "learning_rate": 4.92851415199786e-05, "loss": 0.8008434772491455, "step": 1847 }, { "epoch": 2.2674846625766873, "grad_norm": 0.1768401712179184, "learning_rate": 4.928394935213121e-05, "loss": 0.9524754285812378, "step": 1848 }, { "epoch": 2.2687116564417176, "grad_norm": 0.16219457983970642, "learning_rate": 4.9282756205467074e-05, "loss": 0.9215080738067627, "step": 1849 }, { "epoch": 2.2699386503067487, "grad_norm": 0.34127306938171387, "learning_rate": 4.92815620800343e-05, "loss": 0.7857099771499634, "step": 1850 }, { "epoch": 2.271165644171779, "grad_norm": 0.17118456959724426, "learning_rate": 4.928036697588101e-05, "loss": 0.964532732963562, "step": 1851 }, { "epoch": 2.27239263803681, "grad_norm": 0.2255508005619049, "learning_rate": 4.9279170893055377e-05, "loss": 0.9118237495422363, "step": 1852 }, { "epoch": 2.2736196319018402, "grad_norm": 0.24723391234874725, "learning_rate": 4.927797383160561e-05, "loss": 0.7816696166992188, "step": 1853 }, { "epoch": 2.2748466257668714, "grad_norm": 0.1722937971353531, "learning_rate": 4.9276775791579964e-05, "loss": 0.8741923570632935, "step": 1854 }, { "epoch": 2.276073619631902, "grad_norm": 0.21721065044403076, "learning_rate": 4.9275576773026724e-05, "loss": 0.9696674942970276, "step": 1855 }, { "epoch": 2.2773006134969327, "grad_norm": 0.18851076066493988, "learning_rate": 4.9274376775994216e-05, "loss": 0.9807695746421814, "step": 1856 }, { "epoch": 2.2785276073619634, "grad_norm": 0.17801180481910706, "learning_rate": 4.9273175800530824e-05, "loss": 0.9655360579490662, "step": 1857 }, { "epoch": 2.279754601226994, "grad_norm": 0.19140595197677612, "learning_rate": 4.927197384668494e-05, "loss": 0.9639184474945068, "step": 1858 }, { "epoch": 2.2809815950920247, "grad_norm": 0.1962619572877884, "learning_rate": 4.9270770914505004e-05, "loss": 0.7377644181251526, "step": 1859 }, { "epoch": 2.2822085889570554, "grad_norm": 0.23510316014289856, "learning_rate": 4.926956700403953e-05, "loss": 0.7568235397338867, "step": 1860 }, { "epoch": 2.283435582822086, "grad_norm": 0.19664251804351807, "learning_rate": 4.926836211533702e-05, "loss": 0.7688631415367126, "step": 1861 }, { "epoch": 2.2846625766871167, "grad_norm": 0.18970844149589539, "learning_rate": 4.9267156248446054e-05, "loss": 0.932304859161377, "step": 1862 }, { "epoch": 2.2858895705521474, "grad_norm": 0.1769510805606842, "learning_rate": 4.926594940341523e-05, "loss": 0.9190236330032349, "step": 1863 }, { "epoch": 2.287116564417178, "grad_norm": 0.19411204755306244, "learning_rate": 4.9264741580293194e-05, "loss": 0.9321019649505615, "step": 1864 }, { "epoch": 2.2883435582822087, "grad_norm": 0.18319718539714813, "learning_rate": 4.926353277912863e-05, "loss": 0.8462913036346436, "step": 1865 }, { "epoch": 2.2895705521472394, "grad_norm": 0.21945218741893768, "learning_rate": 4.926232299997026e-05, "loss": 0.7781081199645996, "step": 1866 }, { "epoch": 2.29079754601227, "grad_norm": 0.16995689272880554, "learning_rate": 4.926111224286685e-05, "loss": 0.9130319952964783, "step": 1867 }, { "epoch": 2.2920245398773007, "grad_norm": 0.18057040870189667, "learning_rate": 4.92599005078672e-05, "loss": 0.959852933883667, "step": 1868 }, { "epoch": 2.2932515337423314, "grad_norm": 0.18874768912792206, "learning_rate": 4.925868779502015e-05, "loss": 0.9335765838623047, "step": 1869 }, { "epoch": 2.294478527607362, "grad_norm": 0.18561770021915436, "learning_rate": 4.925747410437459e-05, "loss": 0.9250808954238892, "step": 1870 }, { "epoch": 2.2957055214723927, "grad_norm": 0.19985412061214447, "learning_rate": 4.925625943597943e-05, "loss": 0.8944474458694458, "step": 1871 }, { "epoch": 2.2969325153374234, "grad_norm": 0.24039649963378906, "learning_rate": 4.925504378988363e-05, "loss": 0.676800549030304, "step": 1872 }, { "epoch": 2.298159509202454, "grad_norm": 0.22845302522182465, "learning_rate": 4.925382716613619e-05, "loss": 0.7857319116592407, "step": 1873 }, { "epoch": 2.2993865030674847, "grad_norm": 0.22402508556842804, "learning_rate": 4.9252609564786155e-05, "loss": 0.8498733639717102, "step": 1874 }, { "epoch": 2.3006134969325154, "grad_norm": 0.190170556306839, "learning_rate": 4.9251390985882606e-05, "loss": 0.9099641442298889, "step": 1875 }, { "epoch": 2.301840490797546, "grad_norm": 0.2094406932592392, "learning_rate": 4.9250171429474645e-05, "loss": 0.7298871278762817, "step": 1876 }, { "epoch": 2.3030674846625767, "grad_norm": 0.1890544593334198, "learning_rate": 4.924895089561144e-05, "loss": 1.0397744178771973, "step": 1877 }, { "epoch": 2.3042944785276074, "grad_norm": 0.17235994338989258, "learning_rate": 4.924772938434218e-05, "loss": 0.9150694608688354, "step": 1878 }, { "epoch": 2.305521472392638, "grad_norm": 0.18178194761276245, "learning_rate": 4.924650689571611e-05, "loss": 1.0381306409835815, "step": 1879 }, { "epoch": 2.3067484662576687, "grad_norm": 0.1774953156709671, "learning_rate": 4.92452834297825e-05, "loss": 0.9703125953674316, "step": 1880 }, { "epoch": 2.3079754601226994, "grad_norm": 0.17260119318962097, "learning_rate": 4.9244058986590655e-05, "loss": 0.9285784959793091, "step": 1881 }, { "epoch": 2.30920245398773, "grad_norm": 0.22181569039821625, "learning_rate": 4.924283356618995e-05, "loss": 0.7870676517486572, "step": 1882 }, { "epoch": 2.3104294478527607, "grad_norm": 0.21772918105125427, "learning_rate": 4.9241607168629764e-05, "loss": 0.8266769051551819, "step": 1883 }, { "epoch": 2.3116564417177914, "grad_norm": 0.18922026455402374, "learning_rate": 4.9240379793959536e-05, "loss": 0.9127379655838013, "step": 1884 }, { "epoch": 2.312883435582822, "grad_norm": 0.16174767911434174, "learning_rate": 4.9239151442228726e-05, "loss": 0.7656863927841187, "step": 1885 }, { "epoch": 2.3141104294478527, "grad_norm": 0.17659829556941986, "learning_rate": 4.923792211348686e-05, "loss": 0.9127383828163147, "step": 1886 }, { "epoch": 2.3153374233128834, "grad_norm": 0.2115607112646103, "learning_rate": 4.923669180778348e-05, "loss": 0.9442738890647888, "step": 1887 }, { "epoch": 2.316564417177914, "grad_norm": 0.18581388890743256, "learning_rate": 4.9235460525168176e-05, "loss": 0.9427568912506104, "step": 1888 }, { "epoch": 2.3177914110429447, "grad_norm": 0.1921963095664978, "learning_rate": 4.923422826569059e-05, "loss": 0.7933046817779541, "step": 1889 }, { "epoch": 2.3190184049079754, "grad_norm": 0.16583235561847687, "learning_rate": 4.923299502940037e-05, "loss": 0.902498722076416, "step": 1890 }, { "epoch": 2.320245398773006, "grad_norm": 0.23133830726146698, "learning_rate": 4.9231760816347236e-05, "loss": 0.576991081237793, "step": 1891 }, { "epoch": 2.3214723926380367, "grad_norm": 0.2203063815832138, "learning_rate": 4.923052562658094e-05, "loss": 0.720115602016449, "step": 1892 }, { "epoch": 2.3226993865030674, "grad_norm": 0.19653744995594025, "learning_rate": 4.922928946015126e-05, "loss": 0.9069023132324219, "step": 1893 }, { "epoch": 2.323926380368098, "grad_norm": 0.21436256170272827, "learning_rate": 4.9228052317108027e-05, "loss": 1.0010062456130981, "step": 1894 }, { "epoch": 2.3251533742331287, "grad_norm": 0.226000115275383, "learning_rate": 4.9226814197501106e-05, "loss": 0.8498183488845825, "step": 1895 }, { "epoch": 2.3263803680981594, "grad_norm": 0.24405263364315033, "learning_rate": 4.9225575101380394e-05, "loss": 0.8133258819580078, "step": 1896 }, { "epoch": 2.32760736196319, "grad_norm": 0.19951172173023224, "learning_rate": 4.922433502879585e-05, "loss": 0.850066065788269, "step": 1897 }, { "epoch": 2.3288343558282207, "grad_norm": 0.2223375290632248, "learning_rate": 4.9223093979797454e-05, "loss": 0.8764156699180603, "step": 1898 }, { "epoch": 2.3300613496932514, "grad_norm": 0.1925933063030243, "learning_rate": 4.922185195443523e-05, "loss": 0.9663676023483276, "step": 1899 }, { "epoch": 2.331288343558282, "grad_norm": 0.21614868938922882, "learning_rate": 4.922060895275922e-05, "loss": 0.8339564800262451, "step": 1900 }, { "epoch": 2.3325153374233127, "grad_norm": 0.19130681455135345, "learning_rate": 4.9219364974819556e-05, "loss": 0.9465000033378601, "step": 1901 }, { "epoch": 2.3337423312883434, "grad_norm": 0.20960555970668793, "learning_rate": 4.921812002066636e-05, "loss": 0.7464608550071716, "step": 1902 }, { "epoch": 2.334969325153374, "grad_norm": 0.1772882640361786, "learning_rate": 4.921687409034982e-05, "loss": 0.7951661348342896, "step": 1903 }, { "epoch": 2.3361963190184047, "grad_norm": 0.21718928217887878, "learning_rate": 4.921562718392016e-05, "loss": 0.68408203125, "step": 1904 }, { "epoch": 2.3374233128834354, "grad_norm": 0.25412771105766296, "learning_rate": 4.921437930142763e-05, "loss": 0.9037344455718994, "step": 1905 }, { "epoch": 2.338650306748466, "grad_norm": 0.19177347421646118, "learning_rate": 4.921313044292252e-05, "loss": 0.898505449295044, "step": 1906 }, { "epoch": 2.3398773006134967, "grad_norm": 0.1938369870185852, "learning_rate": 4.921188060845519e-05, "loss": 0.7579550743103027, "step": 1907 }, { "epoch": 2.3411042944785274, "grad_norm": 0.1922505497932434, "learning_rate": 4.921062979807601e-05, "loss": 1.0056673288345337, "step": 1908 }, { "epoch": 2.3423312883435585, "grad_norm": 0.19044020771980286, "learning_rate": 4.920937801183539e-05, "loss": 0.830829918384552, "step": 1909 }, { "epoch": 2.3435582822085887, "grad_norm": 0.1911969780921936, "learning_rate": 4.9208125249783784e-05, "loss": 0.8550224304199219, "step": 1910 }, { "epoch": 2.34478527607362, "grad_norm": 0.18866202235221863, "learning_rate": 4.92068715119717e-05, "loss": 0.8738923668861389, "step": 1911 }, { "epoch": 2.34601226993865, "grad_norm": 0.20403389632701874, "learning_rate": 4.9205616798449664e-05, "loss": 0.7840300798416138, "step": 1912 }, { "epoch": 2.347239263803681, "grad_norm": 0.19510459899902344, "learning_rate": 4.920436110926825e-05, "loss": 0.8535134792327881, "step": 1913 }, { "epoch": 2.3484662576687114, "grad_norm": 0.17614853382110596, "learning_rate": 4.920310444447807e-05, "loss": 1.0161106586456299, "step": 1914 }, { "epoch": 2.3496932515337425, "grad_norm": 0.2467978298664093, "learning_rate": 4.920184680412979e-05, "loss": 0.7960246801376343, "step": 1915 }, { "epoch": 2.3509202453987728, "grad_norm": 0.2871304452419281, "learning_rate": 4.920058818827409e-05, "loss": 0.6945743560791016, "step": 1916 }, { "epoch": 2.352147239263804, "grad_norm": 0.23796719312667847, "learning_rate": 4.9199328596961694e-05, "loss": 0.6943196058273315, "step": 1917 }, { "epoch": 2.3533742331288345, "grad_norm": 0.20387674868106842, "learning_rate": 4.9198068030243384e-05, "loss": 0.808634877204895, "step": 1918 }, { "epoch": 2.354601226993865, "grad_norm": 0.17875508964061737, "learning_rate": 4.919680648816997e-05, "loss": 0.8281752467155457, "step": 1919 }, { "epoch": 2.355828220858896, "grad_norm": 0.17723335325717926, "learning_rate": 4.919554397079229e-05, "loss": 0.8621963858604431, "step": 1920 }, { "epoch": 2.3570552147239265, "grad_norm": 0.20585528016090393, "learning_rate": 4.919428047816125e-05, "loss": 0.860049843788147, "step": 1921 }, { "epoch": 2.358282208588957, "grad_norm": 0.20197385549545288, "learning_rate": 4.9193016010327756e-05, "loss": 0.9273146986961365, "step": 1922 }, { "epoch": 2.359509202453988, "grad_norm": 0.19357463717460632, "learning_rate": 4.91917505673428e-05, "loss": 0.991608738899231, "step": 1923 }, { "epoch": 2.3607361963190185, "grad_norm": 0.18257738649845123, "learning_rate": 4.919048414925737e-05, "loss": 0.7943077087402344, "step": 1924 }, { "epoch": 2.361963190184049, "grad_norm": 0.20809446275234222, "learning_rate": 4.918921675612251e-05, "loss": 0.8841148018836975, "step": 1925 }, { "epoch": 2.36319018404908, "grad_norm": 0.19429004192352295, "learning_rate": 4.9187948387989325e-05, "loss": 0.8543071150779724, "step": 1926 }, { "epoch": 2.3644171779141105, "grad_norm": 0.1870945245027542, "learning_rate": 4.9186679044908925e-05, "loss": 0.8431538343429565, "step": 1927 }, { "epoch": 2.365644171779141, "grad_norm": 0.19101722538471222, "learning_rate": 4.918540872693247e-05, "loss": 0.9216923713684082, "step": 1928 }, { "epoch": 2.366871165644172, "grad_norm": 0.18837648630142212, "learning_rate": 4.9184137434111165e-05, "loss": 0.9095999598503113, "step": 1929 }, { "epoch": 2.3680981595092025, "grad_norm": 0.18765152990818024, "learning_rate": 4.918286516649626e-05, "loss": 0.9365110993385315, "step": 1930 }, { "epoch": 2.369325153374233, "grad_norm": 0.18411822617053986, "learning_rate": 4.918159192413904e-05, "loss": 0.7959738969802856, "step": 1931 }, { "epoch": 2.370552147239264, "grad_norm": 0.20098260045051575, "learning_rate": 4.9180317707090806e-05, "loss": 0.8544799089431763, "step": 1932 }, { "epoch": 2.3717791411042946, "grad_norm": 0.21406050026416779, "learning_rate": 4.9179042515402926e-05, "loss": 0.89927077293396, "step": 1933 }, { "epoch": 2.373006134969325, "grad_norm": 0.18690188229084015, "learning_rate": 4.917776634912682e-05, "loss": 0.8090450763702393, "step": 1934 }, { "epoch": 2.374233128834356, "grad_norm": 0.24028198421001434, "learning_rate": 4.91764892083139e-05, "loss": 0.8372797966003418, "step": 1935 }, { "epoch": 2.3754601226993866, "grad_norm": 0.17828646302223206, "learning_rate": 4.917521109301565e-05, "loss": 0.7367762327194214, "step": 1936 }, { "epoch": 2.3766871165644172, "grad_norm": 0.21606691181659698, "learning_rate": 4.9173932003283596e-05, "loss": 0.8412348031997681, "step": 1937 }, { "epoch": 2.377914110429448, "grad_norm": 0.1723213940858841, "learning_rate": 4.9172651939169285e-05, "loss": 0.9091533422470093, "step": 1938 }, { "epoch": 2.3791411042944786, "grad_norm": 0.20434415340423584, "learning_rate": 4.9171370900724326e-05, "loss": 0.9625608921051025, "step": 1939 }, { "epoch": 2.3803680981595092, "grad_norm": 0.1921880543231964, "learning_rate": 4.917008888800033e-05, "loss": 0.8219057321548462, "step": 1940 }, { "epoch": 2.38159509202454, "grad_norm": 0.20980283617973328, "learning_rate": 4.9168805901049e-05, "loss": 0.828074038028717, "step": 1941 }, { "epoch": 2.3828220858895706, "grad_norm": 0.1955694854259491, "learning_rate": 4.9167521939922027e-05, "loss": 0.861845850944519, "step": 1942 }, { "epoch": 2.3840490797546012, "grad_norm": 0.17613179981708527, "learning_rate": 4.916623700467118e-05, "loss": 0.953903317451477, "step": 1943 }, { "epoch": 2.385276073619632, "grad_norm": 0.24513962864875793, "learning_rate": 4.9164951095348236e-05, "loss": 0.6911848783493042, "step": 1944 }, { "epoch": 2.3865030674846626, "grad_norm": 0.16384369134902954, "learning_rate": 4.9163664212005035e-05, "loss": 0.9371635913848877, "step": 1945 }, { "epoch": 2.3877300613496932, "grad_norm": 0.1850859522819519, "learning_rate": 4.916237635469345e-05, "loss": 0.9673163890838623, "step": 1946 }, { "epoch": 2.388957055214724, "grad_norm": 0.19766677916049957, "learning_rate": 4.916108752346539e-05, "loss": 0.8005291223526001, "step": 1947 }, { "epoch": 2.3901840490797546, "grad_norm": 0.18942908942699432, "learning_rate": 4.915979771837279e-05, "loss": 0.9108432531356812, "step": 1948 }, { "epoch": 2.3914110429447852, "grad_norm": 0.19291125237941742, "learning_rate": 4.9158506939467664e-05, "loss": 0.9143249988555908, "step": 1949 }, { "epoch": 2.392638036809816, "grad_norm": 0.17428578436374664, "learning_rate": 4.9157215186802016e-05, "loss": 0.943490207195282, "step": 1950 }, { "epoch": 2.3938650306748466, "grad_norm": 0.23102442920207977, "learning_rate": 4.915592246042792e-05, "loss": 0.7408949136734009, "step": 1951 }, { "epoch": 2.3950920245398772, "grad_norm": 0.21598093211650848, "learning_rate": 4.915462876039749e-05, "loss": 0.9317041039466858, "step": 1952 }, { "epoch": 2.396319018404908, "grad_norm": 0.24407252669334412, "learning_rate": 4.915333408676287e-05, "loss": 0.8837110996246338, "step": 1953 }, { "epoch": 2.3975460122699386, "grad_norm": 0.17744700610637665, "learning_rate": 4.915203843957623e-05, "loss": 0.8905078768730164, "step": 1954 }, { "epoch": 2.3987730061349692, "grad_norm": 0.23543564975261688, "learning_rate": 4.915074181888981e-05, "loss": 0.7823557257652283, "step": 1955 }, { "epoch": 2.4, "grad_norm": 0.15634159743785858, "learning_rate": 4.914944422475587e-05, "loss": 0.9453868865966797, "step": 1956 }, { "epoch": 2.4012269938650306, "grad_norm": 0.23540671169757843, "learning_rate": 4.914814565722671e-05, "loss": 0.7590214014053345, "step": 1957 }, { "epoch": 2.4024539877300612, "grad_norm": 0.19982020556926727, "learning_rate": 4.914684611635467e-05, "loss": 0.7630822658538818, "step": 1958 }, { "epoch": 2.403680981595092, "grad_norm": 0.25528112053871155, "learning_rate": 4.9145545602192126e-05, "loss": 0.6345767974853516, "step": 1959 }, { "epoch": 2.4049079754601226, "grad_norm": 0.21464598178863525, "learning_rate": 4.914424411479151e-05, "loss": 0.7764972448348999, "step": 1960 }, { "epoch": 2.4061349693251532, "grad_norm": 0.17037636041641235, "learning_rate": 4.914294165420528e-05, "loss": 0.9748914241790771, "step": 1961 }, { "epoch": 2.407361963190184, "grad_norm": 0.2691347897052765, "learning_rate": 4.9141638220485916e-05, "loss": 0.6330305337905884, "step": 1962 }, { "epoch": 2.4085889570552146, "grad_norm": 0.19634249806404114, "learning_rate": 4.9140333813685976e-05, "loss": 1.0071499347686768, "step": 1963 }, { "epoch": 2.4098159509202453, "grad_norm": 0.22637632489204407, "learning_rate": 4.9139028433858036e-05, "loss": 0.7479587197303772, "step": 1964 }, { "epoch": 2.411042944785276, "grad_norm": 0.21445097029209137, "learning_rate": 4.9137722081054706e-05, "loss": 0.7501163482666016, "step": 1965 }, { "epoch": 2.4122699386503066, "grad_norm": 0.19295087456703186, "learning_rate": 4.913641475532863e-05, "loss": 0.9532305598258972, "step": 1966 }, { "epoch": 2.4134969325153373, "grad_norm": 0.1939060091972351, "learning_rate": 4.9135106456732525e-05, "loss": 0.7231448888778687, "step": 1967 }, { "epoch": 2.414723926380368, "grad_norm": 0.2439105063676834, "learning_rate": 4.9133797185319105e-05, "loss": 0.7507177591323853, "step": 1968 }, { "epoch": 2.4159509202453986, "grad_norm": 0.232352152466774, "learning_rate": 4.9132486941141167e-05, "loss": 0.830825686454773, "step": 1969 }, { "epoch": 2.4171779141104293, "grad_norm": 0.1757306605577469, "learning_rate": 4.9131175724251496e-05, "loss": 0.9088940024375916, "step": 1970 }, { "epoch": 2.41840490797546, "grad_norm": 0.18483299016952515, "learning_rate": 4.9129863534702956e-05, "loss": 0.8755274415016174, "step": 1971 }, { "epoch": 2.419631901840491, "grad_norm": 0.20883983373641968, "learning_rate": 4.912855037254844e-05, "loss": 0.8367748260498047, "step": 1972 }, { "epoch": 2.4208588957055213, "grad_norm": 0.231523796916008, "learning_rate": 4.912723623784088e-05, "loss": 0.806782603263855, "step": 1973 }, { "epoch": 2.4220858895705524, "grad_norm": 0.18896178901195526, "learning_rate": 4.912592113063324e-05, "loss": 0.8514517545700073, "step": 1974 }, { "epoch": 2.4233128834355826, "grad_norm": 0.19437314569950104, "learning_rate": 4.912460505097852e-05, "loss": 0.8399753570556641, "step": 1975 }, { "epoch": 2.4245398773006137, "grad_norm": 0.20940300822257996, "learning_rate": 4.912328799892978e-05, "loss": 0.7897269129753113, "step": 1976 }, { "epoch": 2.425766871165644, "grad_norm": 0.198567196726799, "learning_rate": 4.912196997454011e-05, "loss": 0.701506495475769, "step": 1977 }, { "epoch": 2.426993865030675, "grad_norm": 0.19547158479690552, "learning_rate": 4.912065097786262e-05, "loss": 0.8819347620010376, "step": 1978 }, { "epoch": 2.4282208588957057, "grad_norm": 0.1909191757440567, "learning_rate": 4.911933100895049e-05, "loss": 0.7303451299667358, "step": 1979 }, { "epoch": 2.4294478527607364, "grad_norm": 0.23155681788921356, "learning_rate": 4.9118010067856914e-05, "loss": 0.8523619174957275, "step": 1980 }, { "epoch": 2.430674846625767, "grad_norm": 0.17148393392562866, "learning_rate": 4.911668815463513e-05, "loss": 1.0272530317306519, "step": 1981 }, { "epoch": 2.4319018404907977, "grad_norm": 0.21312780678272247, "learning_rate": 4.9115365269338444e-05, "loss": 0.83933424949646, "step": 1982 }, { "epoch": 2.4331288343558284, "grad_norm": 0.18536916375160217, "learning_rate": 4.911404141202015e-05, "loss": 0.8946059942245483, "step": 1983 }, { "epoch": 2.434355828220859, "grad_norm": 0.18383224308490753, "learning_rate": 4.911271658273363e-05, "loss": 0.9432823061943054, "step": 1984 }, { "epoch": 2.4355828220858897, "grad_norm": 0.2067876160144806, "learning_rate": 4.9111390781532274e-05, "loss": 0.78709876537323, "step": 1985 }, { "epoch": 2.4368098159509204, "grad_norm": 0.19304285943508148, "learning_rate": 4.911006400846953e-05, "loss": 0.9401748776435852, "step": 1986 }, { "epoch": 2.438036809815951, "grad_norm": 0.15860728919506073, "learning_rate": 4.910873626359886e-05, "loss": 0.9083156585693359, "step": 1987 }, { "epoch": 2.4392638036809817, "grad_norm": 0.23211434483528137, "learning_rate": 4.910740754697379e-05, "loss": 0.7952883243560791, "step": 1988 }, { "epoch": 2.4404907975460124, "grad_norm": 0.16340957581996918, "learning_rate": 4.9106077858647894e-05, "loss": 0.9502370357513428, "step": 1989 }, { "epoch": 2.441717791411043, "grad_norm": 0.201094850897789, "learning_rate": 4.910474719867474e-05, "loss": 0.8202266097068787, "step": 1990 }, { "epoch": 2.4429447852760737, "grad_norm": 0.2059275209903717, "learning_rate": 4.9103415567107975e-05, "loss": 0.8579673767089844, "step": 1991 }, { "epoch": 2.4441717791411044, "grad_norm": 0.35440292954444885, "learning_rate": 4.9102082964001275e-05, "loss": 0.9247883558273315, "step": 1992 }, { "epoch": 2.445398773006135, "grad_norm": 0.16032905876636505, "learning_rate": 4.910074938940835e-05, "loss": 1.0275849103927612, "step": 1993 }, { "epoch": 2.4466257668711657, "grad_norm": 0.19123047590255737, "learning_rate": 4.9099414843382965e-05, "loss": 0.8779357671737671, "step": 1994 }, { "epoch": 2.4478527607361964, "grad_norm": 0.19159722328186035, "learning_rate": 4.909807932597889e-05, "loss": 0.87266606092453, "step": 1995 }, { "epoch": 2.449079754601227, "grad_norm": 0.20264165103435516, "learning_rate": 4.909674283724997e-05, "loss": 0.9558857679367065, "step": 1996 }, { "epoch": 2.4503067484662577, "grad_norm": 0.17167049646377563, "learning_rate": 4.909540537725007e-05, "loss": 0.9014919996261597, "step": 1997 }, { "epoch": 2.4515337423312884, "grad_norm": 0.20240791141986847, "learning_rate": 4.909406694603311e-05, "loss": 0.9929538369178772, "step": 1998 }, { "epoch": 2.452760736196319, "grad_norm": 0.18607108294963837, "learning_rate": 4.909272754365302e-05, "loss": 0.8565428853034973, "step": 1999 }, { "epoch": 2.4539877300613497, "grad_norm": 0.1630648821592331, "learning_rate": 4.90913871701638e-05, "loss": 0.9177169799804688, "step": 2000 }, { "epoch": 2.4552147239263804, "grad_norm": 0.1812446266412735, "learning_rate": 4.909004582561948e-05, "loss": 1.0479278564453125, "step": 2001 }, { "epoch": 2.456441717791411, "grad_norm": 0.20889578759670258, "learning_rate": 4.908870351007412e-05, "loss": 1.1795518398284912, "step": 2002 }, { "epoch": 2.4576687116564417, "grad_norm": 0.2074962705373764, "learning_rate": 4.908736022358181e-05, "loss": 0.856662392616272, "step": 2003 }, { "epoch": 2.4588957055214724, "grad_norm": 0.21785615384578705, "learning_rate": 4.9086015966196725e-05, "loss": 0.8356398940086365, "step": 2004 }, { "epoch": 2.460122699386503, "grad_norm": 0.21884481608867645, "learning_rate": 4.908467073797303e-05, "loss": 0.7772617936134338, "step": 2005 }, { "epoch": 2.4613496932515337, "grad_norm": 0.2379622459411621, "learning_rate": 4.9083324538964934e-05, "loss": 0.8538402318954468, "step": 2006 }, { "epoch": 2.4625766871165644, "grad_norm": 0.22171859443187714, "learning_rate": 4.908197736922673e-05, "loss": 0.8446176052093506, "step": 2007 }, { "epoch": 2.463803680981595, "grad_norm": 0.20735269784927368, "learning_rate": 4.9080629228812695e-05, "loss": 0.974732518196106, "step": 2008 }, { "epoch": 2.4650306748466257, "grad_norm": 0.1952991634607315, "learning_rate": 4.907928011777718e-05, "loss": 0.7410350441932678, "step": 2009 }, { "epoch": 2.4662576687116564, "grad_norm": 0.2063770890235901, "learning_rate": 4.9077930036174556e-05, "loss": 0.8905318379402161, "step": 2010 }, { "epoch": 2.467484662576687, "grad_norm": 0.22824865579605103, "learning_rate": 4.9076578984059246e-05, "loss": 0.631864607334137, "step": 2011 }, { "epoch": 2.4687116564417177, "grad_norm": 0.20615680515766144, "learning_rate": 4.907522696148571e-05, "loss": 0.7088950276374817, "step": 2012 }, { "epoch": 2.4699386503067484, "grad_norm": 0.14130038022994995, "learning_rate": 4.907387396850843e-05, "loss": 0.9654497504234314, "step": 2013 }, { "epoch": 2.471165644171779, "grad_norm": 0.19823503494262695, "learning_rate": 4.907252000518196e-05, "loss": 0.8114274740219116, "step": 2014 }, { "epoch": 2.4723926380368098, "grad_norm": 0.19376002252101898, "learning_rate": 4.907116507156086e-05, "loss": 0.8214151263237, "step": 2015 }, { "epoch": 2.4736196319018404, "grad_norm": 0.19816547632217407, "learning_rate": 4.906980916769976e-05, "loss": 0.853097677230835, "step": 2016 }, { "epoch": 2.474846625766871, "grad_norm": 0.24468953907489777, "learning_rate": 4.906845229365329e-05, "loss": 0.8803714513778687, "step": 2017 }, { "epoch": 2.4760736196319018, "grad_norm": 0.20167836546897888, "learning_rate": 4.906709444947615e-05, "loss": 0.7904136180877686, "step": 2018 }, { "epoch": 2.4773006134969324, "grad_norm": 0.22585193812847137, "learning_rate": 4.9065735635223085e-05, "loss": 0.6925946474075317, "step": 2019 }, { "epoch": 2.478527607361963, "grad_norm": 0.19210585951805115, "learning_rate": 4.906437585094885e-05, "loss": 0.8566412925720215, "step": 2020 }, { "epoch": 2.4797546012269938, "grad_norm": 0.27468928694725037, "learning_rate": 4.9063015096708254e-05, "loss": 0.7502186298370361, "step": 2021 }, { "epoch": 2.4809815950920244, "grad_norm": 0.2651205360889435, "learning_rate": 4.906165337255616e-05, "loss": 0.8550838232040405, "step": 2022 }, { "epoch": 2.482208588957055, "grad_norm": 0.19840244948863983, "learning_rate": 4.9060290678547436e-05, "loss": 0.859994649887085, "step": 2023 }, { "epoch": 2.4834355828220858, "grad_norm": 0.1971668303012848, "learning_rate": 4.905892701473702e-05, "loss": 0.8867738246917725, "step": 2024 }, { "epoch": 2.4846625766871164, "grad_norm": 0.167233407497406, "learning_rate": 4.905756238117987e-05, "loss": 1.047363519668579, "step": 2025 }, { "epoch": 2.485889570552147, "grad_norm": 0.19808493554592133, "learning_rate": 4.9056196777931e-05, "loss": 0.8262235522270203, "step": 2026 }, { "epoch": 2.4871165644171778, "grad_norm": 0.19524429738521576, "learning_rate": 4.9054830205045445e-05, "loss": 0.7246056199073792, "step": 2027 }, { "epoch": 2.4883435582822084, "grad_norm": 0.2127412110567093, "learning_rate": 4.905346266257829e-05, "loss": 0.8139204978942871, "step": 2028 }, { "epoch": 2.489570552147239, "grad_norm": 0.20479926466941833, "learning_rate": 4.905209415058466e-05, "loss": 0.7533786296844482, "step": 2029 }, { "epoch": 2.4907975460122698, "grad_norm": 0.205179825425148, "learning_rate": 4.9050724669119714e-05, "loss": 0.7978121638298035, "step": 2030 }, { "epoch": 2.4920245398773004, "grad_norm": 0.22017084062099457, "learning_rate": 4.9049354218238655e-05, "loss": 0.8000261783599854, "step": 2031 }, { "epoch": 2.493251533742331, "grad_norm": 0.2293592393398285, "learning_rate": 4.904798279799671e-05, "loss": 1.1175532341003418, "step": 2032 }, { "epoch": 2.4944785276073618, "grad_norm": 0.19989097118377686, "learning_rate": 4.904661040844917e-05, "loss": 0.9531018733978271, "step": 2033 }, { "epoch": 2.4957055214723924, "grad_norm": 0.20079457759857178, "learning_rate": 4.904523704965135e-05, "loss": 0.8793597221374512, "step": 2034 }, { "epoch": 2.4969325153374236, "grad_norm": 0.19500964879989624, "learning_rate": 4.9043862721658596e-05, "loss": 0.8950158357620239, "step": 2035 }, { "epoch": 2.4981595092024538, "grad_norm": 0.17722803354263306, "learning_rate": 4.904248742452632e-05, "loss": 0.9225175380706787, "step": 2036 }, { "epoch": 2.499386503067485, "grad_norm": 0.2071290761232376, "learning_rate": 4.904111115830994e-05, "loss": 0.9747220277786255, "step": 2037 }, { "epoch": 2.500613496932515, "grad_norm": 0.2114502638578415, "learning_rate": 4.903973392306494e-05, "loss": 0.8642747402191162, "step": 2038 }, { "epoch": 2.5018404907975462, "grad_norm": 0.2117314636707306, "learning_rate": 4.903835571884683e-05, "loss": 0.8067140579223633, "step": 2039 }, { "epoch": 2.5030674846625764, "grad_norm": 0.18271629512310028, "learning_rate": 4.9036976545711153e-05, "loss": 1.055950403213501, "step": 2040 }, { "epoch": 2.5042944785276076, "grad_norm": 0.18060241639614105, "learning_rate": 4.903559640371351e-05, "loss": 0.7302552461624146, "step": 2041 }, { "epoch": 2.505521472392638, "grad_norm": 0.18381844460964203, "learning_rate": 4.9034215292909536e-05, "loss": 0.912084698677063, "step": 2042 }, { "epoch": 2.506748466257669, "grad_norm": 0.2089994251728058, "learning_rate": 4.9032833213354887e-05, "loss": 1.0708715915679932, "step": 2043 }, { "epoch": 2.507975460122699, "grad_norm": 0.20866659283638, "learning_rate": 4.903145016510528e-05, "loss": 0.7860921621322632, "step": 2044 }, { "epoch": 2.5092024539877302, "grad_norm": 0.1983002871274948, "learning_rate": 4.9030066148216445e-05, "loss": 0.8495457172393799, "step": 2045 }, { "epoch": 2.510429447852761, "grad_norm": 0.22862477600574493, "learning_rate": 4.902868116274418e-05, "loss": 0.7384456396102905, "step": 2046 }, { "epoch": 2.5116564417177916, "grad_norm": 0.17885418236255646, "learning_rate": 4.9027295208744314e-05, "loss": 0.852887749671936, "step": 2047 }, { "epoch": 2.5128834355828222, "grad_norm": 0.18933556973934174, "learning_rate": 4.902590828627272e-05, "loss": 0.7748652100563049, "step": 2048 }, { "epoch": 2.514110429447853, "grad_norm": 0.22638869285583496, "learning_rate": 4.902452039538526e-05, "loss": 0.9736927151679993, "step": 2049 }, { "epoch": 2.5153374233128836, "grad_norm": 0.24546581506729126, "learning_rate": 4.902313153613792e-05, "loss": 0.8532152771949768, "step": 2050 }, { "epoch": 2.5165644171779142, "grad_norm": 0.20654051005840302, "learning_rate": 4.902174170858666e-05, "loss": 0.8326992392539978, "step": 2051 }, { "epoch": 2.517791411042945, "grad_norm": 0.18855273723602295, "learning_rate": 4.9020350912787504e-05, "loss": 0.9140040874481201, "step": 2052 }, { "epoch": 2.5190184049079756, "grad_norm": 0.2277073860168457, "learning_rate": 4.901895914879651e-05, "loss": 0.7856408357620239, "step": 2053 }, { "epoch": 2.5202453987730062, "grad_norm": 0.2135140299797058, "learning_rate": 4.901756641666978e-05, "loss": 0.8283081650733948, "step": 2054 }, { "epoch": 2.521472392638037, "grad_norm": 0.19049927592277527, "learning_rate": 4.901617271646345e-05, "loss": 0.8459190130233765, "step": 2055 }, { "epoch": 2.5226993865030676, "grad_norm": 0.20091071724891663, "learning_rate": 4.901477804823369e-05, "loss": 1.0222597122192383, "step": 2056 }, { "epoch": 2.5239263803680982, "grad_norm": 0.17776720225811005, "learning_rate": 4.9013382412036715e-05, "loss": 0.9003598093986511, "step": 2057 }, { "epoch": 2.525153374233129, "grad_norm": 0.21362629532814026, "learning_rate": 4.901198580792879e-05, "loss": 0.8284887075424194, "step": 2058 }, { "epoch": 2.5263803680981596, "grad_norm": 0.5273512005805969, "learning_rate": 4.9010588235966196e-05, "loss": 0.8994369506835938, "step": 2059 }, { "epoch": 2.5276073619631902, "grad_norm": 0.19197826087474823, "learning_rate": 4.900918969620527e-05, "loss": 0.7806567549705505, "step": 2060 }, { "epoch": 2.528834355828221, "grad_norm": 0.2074517458677292, "learning_rate": 4.900779018870239e-05, "loss": 0.83880615234375, "step": 2061 }, { "epoch": 2.5300613496932516, "grad_norm": 0.18009090423583984, "learning_rate": 4.900638971351395e-05, "loss": 0.803932785987854, "step": 2062 }, { "epoch": 2.5312883435582823, "grad_norm": 0.2037276327610016, "learning_rate": 4.9004988270696425e-05, "loss": 0.7421091794967651, "step": 2063 }, { "epoch": 2.532515337423313, "grad_norm": 0.20962144434452057, "learning_rate": 4.900358586030628e-05, "loss": 0.7577528953552246, "step": 2064 }, { "epoch": 2.5337423312883436, "grad_norm": 0.19678816199302673, "learning_rate": 4.900218248240004e-05, "loss": 0.9381227493286133, "step": 2065 }, { "epoch": 2.5349693251533743, "grad_norm": 0.1928989291191101, "learning_rate": 4.900077813703429e-05, "loss": 1.0307157039642334, "step": 2066 }, { "epoch": 2.536196319018405, "grad_norm": 0.2295542061328888, "learning_rate": 4.8999372824265624e-05, "loss": 0.7385658025741577, "step": 2067 }, { "epoch": 2.5374233128834356, "grad_norm": 0.181247279047966, "learning_rate": 4.8997966544150683e-05, "loss": 0.9826686382293701, "step": 2068 }, { "epoch": 2.5386503067484663, "grad_norm": 0.2692924439907074, "learning_rate": 4.899655929674617e-05, "loss": 0.8684518933296204, "step": 2069 }, { "epoch": 2.539877300613497, "grad_norm": 0.22047112882137299, "learning_rate": 4.899515108210878e-05, "loss": 0.8963215351104736, "step": 2070 }, { "epoch": 2.5411042944785276, "grad_norm": 0.18863938748836517, "learning_rate": 4.8993741900295284e-05, "loss": 0.8491308093070984, "step": 2071 }, { "epoch": 2.5423312883435583, "grad_norm": 0.18495675921440125, "learning_rate": 4.8992331751362494e-05, "loss": 0.9724169373512268, "step": 2072 }, { "epoch": 2.543558282208589, "grad_norm": 0.17427614331245422, "learning_rate": 4.8990920635367234e-05, "loss": 0.8681283593177795, "step": 2073 }, { "epoch": 2.5447852760736196, "grad_norm": 0.21088635921478271, "learning_rate": 4.898950855236638e-05, "loss": 0.8286991119384766, "step": 2074 }, { "epoch": 2.5460122699386503, "grad_norm": 0.1563616842031479, "learning_rate": 4.898809550241687e-05, "loss": 0.8575851917266846, "step": 2075 }, { "epoch": 2.547239263803681, "grad_norm": 0.21459804475307465, "learning_rate": 4.8986681485575635e-05, "loss": 0.8656426668167114, "step": 2076 }, { "epoch": 2.5484662576687116, "grad_norm": 0.21791817247867584, "learning_rate": 4.898526650189968e-05, "loss": 0.898030698299408, "step": 2077 }, { "epoch": 2.5496932515337423, "grad_norm": 0.17716719210147858, "learning_rate": 4.8983850551446055e-05, "loss": 0.9403637647628784, "step": 2078 }, { "epoch": 2.550920245398773, "grad_norm": 0.20944498479366302, "learning_rate": 4.898243363427181e-05, "loss": 0.8322556614875793, "step": 2079 }, { "epoch": 2.5521472392638036, "grad_norm": 0.17912760376930237, "learning_rate": 4.898101575043407e-05, "loss": 0.8298505544662476, "step": 2080 }, { "epoch": 2.5533742331288343, "grad_norm": 0.15915153920650482, "learning_rate": 4.8979596899989966e-05, "loss": 0.9467607140541077, "step": 2081 }, { "epoch": 2.554601226993865, "grad_norm": 0.16296881437301636, "learning_rate": 4.8978177082996715e-05, "loss": 1.0075056552886963, "step": 2082 }, { "epoch": 2.5558282208588956, "grad_norm": 0.20689056813716888, "learning_rate": 4.8976756299511536e-05, "loss": 0.9281057119369507, "step": 2083 }, { "epoch": 2.5570552147239263, "grad_norm": 0.20468544960021973, "learning_rate": 4.897533454959169e-05, "loss": 0.907701313495636, "step": 2084 }, { "epoch": 2.558282208588957, "grad_norm": 0.17051304876804352, "learning_rate": 4.8973911833294485e-05, "loss": 0.7637481689453125, "step": 2085 }, { "epoch": 2.5595092024539876, "grad_norm": 0.22565177083015442, "learning_rate": 4.8972488150677274e-05, "loss": 0.9151718616485596, "step": 2086 }, { "epoch": 2.5607361963190183, "grad_norm": 0.21783442795276642, "learning_rate": 4.897106350179744e-05, "loss": 0.8126559853553772, "step": 2087 }, { "epoch": 2.561963190184049, "grad_norm": 0.19731682538986206, "learning_rate": 4.8969637886712396e-05, "loss": 0.8889625072479248, "step": 2088 }, { "epoch": 2.5631901840490796, "grad_norm": 0.1807175874710083, "learning_rate": 4.8968211305479613e-05, "loss": 0.8461199998855591, "step": 2089 }, { "epoch": 2.5644171779141103, "grad_norm": 0.1764141172170639, "learning_rate": 4.89667837581566e-05, "loss": 0.8645211458206177, "step": 2090 }, { "epoch": 2.565644171779141, "grad_norm": 0.18373072147369385, "learning_rate": 4.8965355244800884e-05, "loss": 0.7773918509483337, "step": 2091 }, { "epoch": 2.5668711656441716, "grad_norm": 0.22071273624897003, "learning_rate": 4.896392576547005e-05, "loss": 0.7842212915420532, "step": 2092 }, { "epoch": 2.5680981595092023, "grad_norm": 0.2177460938692093, "learning_rate": 4.896249532022171e-05, "loss": 0.8737887740135193, "step": 2093 }, { "epoch": 2.5693251533742334, "grad_norm": 0.22186866402626038, "learning_rate": 4.896106390911354e-05, "loss": 0.8432691097259521, "step": 2094 }, { "epoch": 2.5705521472392636, "grad_norm": 0.23065446317195892, "learning_rate": 4.895963153220322e-05, "loss": 0.8714562654495239, "step": 2095 }, { "epoch": 2.5717791411042947, "grad_norm": 0.18951007723808289, "learning_rate": 4.895819818954847e-05, "loss": 0.9334316253662109, "step": 2096 }, { "epoch": 2.573006134969325, "grad_norm": 0.18063920736312866, "learning_rate": 4.89567638812071e-05, "loss": 0.8868029117584229, "step": 2097 }, { "epoch": 2.574233128834356, "grad_norm": 0.20584771037101746, "learning_rate": 4.895532860723691e-05, "loss": 0.6412003040313721, "step": 2098 }, { "epoch": 2.5754601226993863, "grad_norm": 0.22890526056289673, "learning_rate": 4.895389236769573e-05, "loss": 0.8235468864440918, "step": 2099 }, { "epoch": 2.5766871165644174, "grad_norm": 0.17780819535255432, "learning_rate": 4.895245516264147e-05, "loss": 0.7819107174873352, "step": 2100 }, { "epoch": 2.5779141104294476, "grad_norm": 0.19428414106369019, "learning_rate": 4.8951016992132066e-05, "loss": 0.7893627882003784, "step": 2101 }, { "epoch": 2.5791411042944787, "grad_norm": 0.23378399014472961, "learning_rate": 4.894957785622547e-05, "loss": 0.8289810419082642, "step": 2102 }, { "epoch": 2.580368098159509, "grad_norm": 0.18586359918117523, "learning_rate": 4.89481377549797e-05, "loss": 0.8146857619285583, "step": 2103 }, { "epoch": 2.58159509202454, "grad_norm": 0.2082967460155487, "learning_rate": 4.89466966884528e-05, "loss": 0.7969638109207153, "step": 2104 }, { "epoch": 2.5828220858895703, "grad_norm": 0.16244399547576904, "learning_rate": 4.8945254656702856e-05, "loss": 0.9275882840156555, "step": 2105 }, { "epoch": 2.5840490797546014, "grad_norm": 0.20846295356750488, "learning_rate": 4.894381165978799e-05, "loss": 0.820874810218811, "step": 2106 }, { "epoch": 2.5852760736196316, "grad_norm": 0.25942447781562805, "learning_rate": 4.894236769776636e-05, "loss": 0.8319500684738159, "step": 2107 }, { "epoch": 2.5865030674846627, "grad_norm": 0.18663345277309418, "learning_rate": 4.8940922770696174e-05, "loss": 0.9396275877952576, "step": 2108 }, { "epoch": 2.5877300613496934, "grad_norm": 0.17784513533115387, "learning_rate": 4.893947687863568e-05, "loss": 0.8568615317344666, "step": 2109 }, { "epoch": 2.588957055214724, "grad_norm": 0.21521048247814178, "learning_rate": 4.893803002164314e-05, "loss": 0.7433035373687744, "step": 2110 }, { "epoch": 2.5901840490797547, "grad_norm": 0.20134396851062775, "learning_rate": 4.8936582199776894e-05, "loss": 0.8254938125610352, "step": 2111 }, { "epoch": 2.5914110429447854, "grad_norm": 0.18238221108913422, "learning_rate": 4.893513341309529e-05, "loss": 0.8887635469436646, "step": 2112 }, { "epoch": 2.592638036809816, "grad_norm": 0.17755798995494843, "learning_rate": 4.893368366165671e-05, "loss": 0.9204573631286621, "step": 2113 }, { "epoch": 2.5938650306748468, "grad_norm": 0.1785050928592682, "learning_rate": 4.8932232945519616e-05, "loss": 1.0356078147888184, "step": 2114 }, { "epoch": 2.5950920245398774, "grad_norm": 0.19631963968276978, "learning_rate": 4.893078126474246e-05, "loss": 0.8445999026298523, "step": 2115 }, { "epoch": 2.596319018404908, "grad_norm": 0.24447309970855713, "learning_rate": 4.892932861938377e-05, "loss": 0.8768434524536133, "step": 2116 }, { "epoch": 2.5975460122699388, "grad_norm": 0.2120630145072937, "learning_rate": 4.892787500950209e-05, "loss": 0.815667986869812, "step": 2117 }, { "epoch": 2.5987730061349694, "grad_norm": 0.19348298013210297, "learning_rate": 4.8926420435156015e-05, "loss": 0.9299864768981934, "step": 2118 }, { "epoch": 2.6, "grad_norm": 0.18332335352897644, "learning_rate": 4.892496489640417e-05, "loss": 0.8864060640335083, "step": 2119 }, { "epoch": 2.6012269938650308, "grad_norm": 0.24235974252223969, "learning_rate": 4.892350839330522e-05, "loss": 0.7790302634239197, "step": 2120 }, { "epoch": 2.6024539877300614, "grad_norm": 0.1796402782201767, "learning_rate": 4.892205092591789e-05, "loss": 0.8443567752838135, "step": 2121 }, { "epoch": 2.603680981595092, "grad_norm": 0.21821044385433197, "learning_rate": 4.892059249430091e-05, "loss": 0.8037298321723938, "step": 2122 }, { "epoch": 2.6049079754601228, "grad_norm": 0.1880233734846115, "learning_rate": 4.891913309851307e-05, "loss": 0.8413205146789551, "step": 2123 }, { "epoch": 2.6061349693251534, "grad_norm": 0.19062143564224243, "learning_rate": 4.8917672738613195e-05, "loss": 0.9705996513366699, "step": 2124 }, { "epoch": 2.607361963190184, "grad_norm": 0.2220519781112671, "learning_rate": 4.891621141466014e-05, "loss": 0.812179684638977, "step": 2125 }, { "epoch": 2.6085889570552148, "grad_norm": 0.2326146364212036, "learning_rate": 4.8914749126712824e-05, "loss": 0.6808260083198547, "step": 2126 }, { "epoch": 2.6098159509202454, "grad_norm": 0.20479030907154083, "learning_rate": 4.8913285874830174e-05, "loss": 0.7931845188140869, "step": 2127 }, { "epoch": 2.611042944785276, "grad_norm": 0.2361782193183899, "learning_rate": 4.891182165907118e-05, "loss": 0.7751710414886475, "step": 2128 }, { "epoch": 2.6122699386503068, "grad_norm": 0.18812520802021027, "learning_rate": 4.8910356479494846e-05, "loss": 0.9536247253417969, "step": 2129 }, { "epoch": 2.6134969325153374, "grad_norm": 0.15863393247127533, "learning_rate": 4.890889033616023e-05, "loss": 0.9343034029006958, "step": 2130 }, { "epoch": 2.614723926380368, "grad_norm": 0.19211384654045105, "learning_rate": 4.8907423229126446e-05, "loss": 0.8461875319480896, "step": 2131 }, { "epoch": 2.6159509202453988, "grad_norm": 0.1802280694246292, "learning_rate": 4.8905955158452616e-05, "loss": 0.8902325630187988, "step": 2132 }, { "epoch": 2.6171779141104294, "grad_norm": 0.21130028367042542, "learning_rate": 4.8904486124197914e-05, "loss": 0.912463903427124, "step": 2133 }, { "epoch": 2.61840490797546, "grad_norm": 0.17178046703338623, "learning_rate": 4.890301612642155e-05, "loss": 0.888637900352478, "step": 2134 }, { "epoch": 2.6196319018404908, "grad_norm": 0.19447387754917145, "learning_rate": 4.8901545165182784e-05, "loss": 1.0250293016433716, "step": 2135 }, { "epoch": 2.6208588957055214, "grad_norm": 0.17529600858688354, "learning_rate": 4.89000732405409e-05, "loss": 0.840640127658844, "step": 2136 }, { "epoch": 2.622085889570552, "grad_norm": 0.18345537781715393, "learning_rate": 4.889860035255524e-05, "loss": 0.7968930602073669, "step": 2137 }, { "epoch": 2.6233128834355828, "grad_norm": 0.2010500431060791, "learning_rate": 4.8897126501285144e-05, "loss": 0.8279755115509033, "step": 2138 }, { "epoch": 2.6245398773006134, "grad_norm": 0.1945076882839203, "learning_rate": 4.889565168679005e-05, "loss": 0.9290078282356262, "step": 2139 }, { "epoch": 2.625766871165644, "grad_norm": 0.16497471928596497, "learning_rate": 4.889417590912937e-05, "loss": 0.7340508699417114, "step": 2140 }, { "epoch": 2.626993865030675, "grad_norm": 0.197983518242836, "learning_rate": 4.889269916836262e-05, "loss": 0.9819680452346802, "step": 2141 }, { "epoch": 2.6282208588957054, "grad_norm": 0.20090460777282715, "learning_rate": 4.8891221464549305e-05, "loss": 0.8106812238693237, "step": 2142 }, { "epoch": 2.629447852760736, "grad_norm": 0.19096256792545319, "learning_rate": 4.888974279774901e-05, "loss": 0.7854228615760803, "step": 2143 }, { "epoch": 2.630674846625767, "grad_norm": 0.2064702808856964, "learning_rate": 4.88882631680213e-05, "loss": 0.771173894405365, "step": 2144 }, { "epoch": 2.6319018404907975, "grad_norm": 0.17560824751853943, "learning_rate": 4.888678257542584e-05, "loss": 0.9284349679946899, "step": 2145 }, { "epoch": 2.633128834355828, "grad_norm": 0.1599004566669464, "learning_rate": 4.88853010200223e-05, "loss": 0.9643133878707886, "step": 2146 }, { "epoch": 2.634355828220859, "grad_norm": 0.20923644304275513, "learning_rate": 4.888381850187039e-05, "loss": 0.8747138381004333, "step": 2147 }, { "epoch": 2.6355828220858895, "grad_norm": 0.17952343821525574, "learning_rate": 4.888233502102989e-05, "loss": 0.7367474436759949, "step": 2148 }, { "epoch": 2.63680981595092, "grad_norm": 0.22054831683635712, "learning_rate": 4.888085057756058e-05, "loss": 0.7340694665908813, "step": 2149 }, { "epoch": 2.638036809815951, "grad_norm": 0.2087232768535614, "learning_rate": 4.8879365171522284e-05, "loss": 0.8547459840774536, "step": 2150 }, { "epoch": 2.6392638036809815, "grad_norm": 0.17673425376415253, "learning_rate": 4.887787880297488e-05, "loss": 0.9098749756813049, "step": 2151 }, { "epoch": 2.640490797546012, "grad_norm": 0.17150408029556274, "learning_rate": 4.88763914719783e-05, "loss": 0.9453627467155457, "step": 2152 }, { "epoch": 2.641717791411043, "grad_norm": 0.19981573522090912, "learning_rate": 4.887490317859247e-05, "loss": 0.820220947265625, "step": 2153 }, { "epoch": 2.6429447852760735, "grad_norm": 0.18095733225345612, "learning_rate": 4.8873413922877376e-05, "loss": 0.9040679931640625, "step": 2154 }, { "epoch": 2.644171779141104, "grad_norm": 0.19704513251781464, "learning_rate": 4.8871923704893065e-05, "loss": 1.0878461599349976, "step": 2155 }, { "epoch": 2.645398773006135, "grad_norm": 0.20978949964046478, "learning_rate": 4.8870432524699594e-05, "loss": 0.9868669509887695, "step": 2156 }, { "epoch": 2.646625766871166, "grad_norm": 0.17507952451705933, "learning_rate": 4.886894038235707e-05, "loss": 0.901351273059845, "step": 2157 }, { "epoch": 2.647852760736196, "grad_norm": 0.20974405109882355, "learning_rate": 4.886744727792564e-05, "loss": 0.7487045526504517, "step": 2158 }, { "epoch": 2.6490797546012272, "grad_norm": 0.19282297790050507, "learning_rate": 4.886595321146547e-05, "loss": 0.9191405773162842, "step": 2159 }, { "epoch": 2.6503067484662575, "grad_norm": 0.23572853207588196, "learning_rate": 4.88644581830368e-05, "loss": 0.7812361121177673, "step": 2160 }, { "epoch": 2.6515337423312886, "grad_norm": 0.1950775384902954, "learning_rate": 4.886296219269988e-05, "loss": 0.8905704617500305, "step": 2161 }, { "epoch": 2.652760736196319, "grad_norm": 0.21113476157188416, "learning_rate": 4.886146524051502e-05, "loss": 0.8653755187988281, "step": 2162 }, { "epoch": 2.65398773006135, "grad_norm": 0.20761729776859283, "learning_rate": 4.885996732654255e-05, "loss": 0.8286672830581665, "step": 2163 }, { "epoch": 2.65521472392638, "grad_norm": 0.22993183135986328, "learning_rate": 4.885846845084284e-05, "loss": 0.7626239061355591, "step": 2164 }, { "epoch": 2.6564417177914113, "grad_norm": 0.1817263960838318, "learning_rate": 4.885696861347633e-05, "loss": 0.8000770807266235, "step": 2165 }, { "epoch": 2.6576687116564415, "grad_norm": 0.22593460977077484, "learning_rate": 4.885546781450344e-05, "loss": 0.8200019598007202, "step": 2166 }, { "epoch": 2.6588957055214726, "grad_norm": 0.2192344218492508, "learning_rate": 4.885396605398469e-05, "loss": 0.7990720272064209, "step": 2167 }, { "epoch": 2.660122699386503, "grad_norm": 0.22016172111034393, "learning_rate": 4.8852463331980604e-05, "loss": 0.8872407674789429, "step": 2168 }, { "epoch": 2.661349693251534, "grad_norm": 0.19663956761360168, "learning_rate": 4.885095964855174e-05, "loss": 0.8525274991989136, "step": 2169 }, { "epoch": 2.662576687116564, "grad_norm": 0.20318928360939026, "learning_rate": 4.884945500375872e-05, "loss": 0.8744674921035767, "step": 2170 }, { "epoch": 2.6638036809815953, "grad_norm": 0.1876051127910614, "learning_rate": 4.884794939766219e-05, "loss": 0.7916367650032043, "step": 2171 }, { "epoch": 2.665030674846626, "grad_norm": 0.1748276948928833, "learning_rate": 4.8846442830322846e-05, "loss": 0.9620886445045471, "step": 2172 }, { "epoch": 2.6662576687116566, "grad_norm": 0.19209884107112885, "learning_rate": 4.8844935301801396e-05, "loss": 0.8356779217720032, "step": 2173 }, { "epoch": 2.6674846625766873, "grad_norm": 0.16892731189727783, "learning_rate": 4.884342681215861e-05, "loss": 0.7643305063247681, "step": 2174 }, { "epoch": 2.668711656441718, "grad_norm": 0.18228530883789062, "learning_rate": 4.88419173614553e-05, "loss": 0.8923555612564087, "step": 2175 }, { "epoch": 2.6699386503067486, "grad_norm": 0.2176167517900467, "learning_rate": 4.884040694975229e-05, "loss": 0.9572474956512451, "step": 2176 }, { "epoch": 2.6711656441717793, "grad_norm": 0.22468949854373932, "learning_rate": 4.883889557711048e-05, "loss": 0.8336511850357056, "step": 2177 }, { "epoch": 2.67239263803681, "grad_norm": 0.18801960349082947, "learning_rate": 4.883738324359078e-05, "loss": 0.8592239618301392, "step": 2178 }, { "epoch": 2.6736196319018406, "grad_norm": 0.21825376152992249, "learning_rate": 4.883586994925414e-05, "loss": 0.9351089596748352, "step": 2179 }, { "epoch": 2.6748466257668713, "grad_norm": 0.2351728081703186, "learning_rate": 4.883435569416157e-05, "loss": 0.6768011450767517, "step": 2180 }, { "epoch": 2.676073619631902, "grad_norm": 0.23322206735610962, "learning_rate": 4.8832840478374095e-05, "loss": 0.7513136863708496, "step": 2181 }, { "epoch": 2.6773006134969326, "grad_norm": 0.19421154260635376, "learning_rate": 4.8831324301952806e-05, "loss": 0.8676031231880188, "step": 2182 }, { "epoch": 2.6785276073619633, "grad_norm": 0.1859624683856964, "learning_rate": 4.882980716495879e-05, "loss": 0.9329171180725098, "step": 2183 }, { "epoch": 2.679754601226994, "grad_norm": 0.2211606800556183, "learning_rate": 4.882828906745322e-05, "loss": 0.7412701845169067, "step": 2184 }, { "epoch": 2.6809815950920246, "grad_norm": 0.15743598341941833, "learning_rate": 4.8826770009497275e-05, "loss": 0.8972375392913818, "step": 2185 }, { "epoch": 2.6822085889570553, "grad_norm": 0.23886145651340485, "learning_rate": 4.882524999115219e-05, "loss": 0.9274312257766724, "step": 2186 }, { "epoch": 2.683435582822086, "grad_norm": 0.18998771905899048, "learning_rate": 4.8823729012479225e-05, "loss": 0.8396565914154053, "step": 2187 }, { "epoch": 2.6846625766871166, "grad_norm": 0.2325761318206787, "learning_rate": 4.8822207073539686e-05, "loss": 0.7330549955368042, "step": 2188 }, { "epoch": 2.6858895705521473, "grad_norm": 0.18124765157699585, "learning_rate": 4.882068417439493e-05, "loss": 0.9418120384216309, "step": 2189 }, { "epoch": 2.687116564417178, "grad_norm": 0.22356531023979187, "learning_rate": 4.881916031510634e-05, "loss": 0.6768861413002014, "step": 2190 }, { "epoch": 2.6883435582822086, "grad_norm": 0.18261486291885376, "learning_rate": 4.8817635495735317e-05, "loss": 0.842139720916748, "step": 2191 }, { "epoch": 2.6895705521472393, "grad_norm": 0.21174703538417816, "learning_rate": 4.881610971634335e-05, "loss": 0.7814397811889648, "step": 2192 }, { "epoch": 2.69079754601227, "grad_norm": 0.19916954636573792, "learning_rate": 4.881458297699192e-05, "loss": 0.8022189736366272, "step": 2193 }, { "epoch": 2.6920245398773006, "grad_norm": 0.21138232946395874, "learning_rate": 4.881305527774257e-05, "loss": 0.8085658550262451, "step": 2194 }, { "epoch": 2.6932515337423313, "grad_norm": 0.2119070440530777, "learning_rate": 4.881152661865688e-05, "loss": 0.7634268999099731, "step": 2195 }, { "epoch": 2.694478527607362, "grad_norm": 0.19265305995941162, "learning_rate": 4.880999699979647e-05, "loss": 0.9467224478721619, "step": 2196 }, { "epoch": 2.6957055214723926, "grad_norm": 0.17736651003360748, "learning_rate": 4.8808466421222985e-05, "loss": 0.7727936506271362, "step": 2197 }, { "epoch": 2.6969325153374233, "grad_norm": 0.1741858869791031, "learning_rate": 4.8806934882998124e-05, "loss": 0.9088022708892822, "step": 2198 }, { "epoch": 2.698159509202454, "grad_norm": 0.18295343220233917, "learning_rate": 4.8805402385183616e-05, "loss": 0.7796943187713623, "step": 2199 }, { "epoch": 2.6993865030674846, "grad_norm": 0.19120875000953674, "learning_rate": 4.880386892784123e-05, "loss": 0.8021968007087708, "step": 2200 }, { "epoch": 2.7006134969325153, "grad_norm": 0.21358685195446014, "learning_rate": 4.880233451103278e-05, "loss": 0.9328338503837585, "step": 2201 }, { "epoch": 2.701840490797546, "grad_norm": 0.25600048899650574, "learning_rate": 4.880079913482012e-05, "loss": 0.6770164370536804, "step": 2202 }, { "epoch": 2.7030674846625766, "grad_norm": 0.17709791660308838, "learning_rate": 4.879926279926512e-05, "loss": 0.9567409753799438, "step": 2203 }, { "epoch": 2.7042944785276073, "grad_norm": 0.18265829980373383, "learning_rate": 4.879772550442971e-05, "loss": 0.8886251449584961, "step": 2204 }, { "epoch": 2.705521472392638, "grad_norm": 0.17337091267108917, "learning_rate": 4.879618725037587e-05, "loss": 0.9592123031616211, "step": 2205 }, { "epoch": 2.7067484662576686, "grad_norm": 0.24552257359027863, "learning_rate": 4.879464803716558e-05, "loss": 0.8195731043815613, "step": 2206 }, { "epoch": 2.7079754601226993, "grad_norm": 0.19919683039188385, "learning_rate": 4.87931078648609e-05, "loss": 0.8304147720336914, "step": 2207 }, { "epoch": 2.70920245398773, "grad_norm": 0.21945630013942719, "learning_rate": 4.87915667335239e-05, "loss": 0.834896445274353, "step": 2208 }, { "epoch": 2.7104294478527606, "grad_norm": 0.20022521913051605, "learning_rate": 4.8790024643216706e-05, "loss": 0.7781981229782104, "step": 2209 }, { "epoch": 2.7116564417177913, "grad_norm": 0.20311230421066284, "learning_rate": 4.8788481594001455e-05, "loss": 1.0371822118759155, "step": 2210 }, { "epoch": 2.712883435582822, "grad_norm": 0.185429185628891, "learning_rate": 4.8786937585940374e-05, "loss": 0.9479222893714905, "step": 2211 }, { "epoch": 2.7141104294478526, "grad_norm": 0.2027137726545334, "learning_rate": 4.878539261909567e-05, "loss": 0.8837881684303284, "step": 2212 }, { "epoch": 2.7153374233128833, "grad_norm": 0.23354285955429077, "learning_rate": 4.8783846693529636e-05, "loss": 0.8094651103019714, "step": 2213 }, { "epoch": 2.716564417177914, "grad_norm": 0.1873319447040558, "learning_rate": 4.878229980930457e-05, "loss": 0.8848745822906494, "step": 2214 }, { "epoch": 2.7177914110429446, "grad_norm": 0.22621993720531464, "learning_rate": 4.878075196648283e-05, "loss": 0.8445760607719421, "step": 2215 }, { "epoch": 2.7190184049079753, "grad_norm": 0.26639699935913086, "learning_rate": 4.87792031651268e-05, "loss": 0.6714518666267395, "step": 2216 }, { "epoch": 2.720245398773006, "grad_norm": 0.25695472955703735, "learning_rate": 4.877765340529892e-05, "loss": 0.7578913569450378, "step": 2217 }, { "epoch": 2.721472392638037, "grad_norm": 0.1806982457637787, "learning_rate": 4.877610268706163e-05, "loss": 0.8514130115509033, "step": 2218 }, { "epoch": 2.7226993865030673, "grad_norm": 0.20442408323287964, "learning_rate": 4.8774551010477464e-05, "loss": 0.8465256690979004, "step": 2219 }, { "epoch": 2.7239263803680984, "grad_norm": 0.1769016832113266, "learning_rate": 4.877299837560896e-05, "loss": 0.7681419849395752, "step": 2220 }, { "epoch": 2.7251533742331286, "grad_norm": 0.16236281394958496, "learning_rate": 4.877144478251868e-05, "loss": 0.9350847005844116, "step": 2221 }, { "epoch": 2.7263803680981598, "grad_norm": 0.18365487456321716, "learning_rate": 4.876989023126927e-05, "loss": 0.8126507997512817, "step": 2222 }, { "epoch": 2.72760736196319, "grad_norm": 0.20312218368053436, "learning_rate": 4.876833472192337e-05, "loss": 0.8943542242050171, "step": 2223 }, { "epoch": 2.728834355828221, "grad_norm": 0.22907423973083496, "learning_rate": 4.8766778254543696e-05, "loss": 0.6795436143875122, "step": 2224 }, { "epoch": 2.7300613496932513, "grad_norm": 0.19791045784950256, "learning_rate": 4.876522082919297e-05, "loss": 0.8549712896347046, "step": 2225 }, { "epoch": 2.7312883435582824, "grad_norm": 0.21174417436122894, "learning_rate": 4.876366244593397e-05, "loss": 0.7055399417877197, "step": 2226 }, { "epoch": 2.7325153374233127, "grad_norm": 0.17059138417243958, "learning_rate": 4.8762103104829516e-05, "loss": 0.9609794616699219, "step": 2227 }, { "epoch": 2.7337423312883438, "grad_norm": 0.24944597482681274, "learning_rate": 4.876054280594246e-05, "loss": 0.620807409286499, "step": 2228 }, { "epoch": 2.734969325153374, "grad_norm": 0.226903036236763, "learning_rate": 4.875898154933569e-05, "loss": 0.8523687124252319, "step": 2229 }, { "epoch": 2.736196319018405, "grad_norm": 0.1901526004076004, "learning_rate": 4.875741933507213e-05, "loss": 0.8313283920288086, "step": 2230 }, { "epoch": 2.7374233128834353, "grad_norm": 0.17444977164268494, "learning_rate": 4.875585616321476e-05, "loss": 0.8128200769424438, "step": 2231 }, { "epoch": 2.7386503067484664, "grad_norm": 0.18426240980625153, "learning_rate": 4.8754292033826586e-05, "loss": 0.7869988679885864, "step": 2232 }, { "epoch": 2.7398773006134967, "grad_norm": 0.1841057389974594, "learning_rate": 4.875272694697065e-05, "loss": 0.8444668650627136, "step": 2233 }, { "epoch": 2.7411042944785278, "grad_norm": 0.20548702776432037, "learning_rate": 4.875116090271003e-05, "loss": 0.9598273038864136, "step": 2234 }, { "epoch": 2.7423312883435584, "grad_norm": 0.1821296662092209, "learning_rate": 4.874959390110785e-05, "loss": 0.8491407632827759, "step": 2235 }, { "epoch": 2.743558282208589, "grad_norm": 0.18204793334007263, "learning_rate": 4.8748025942227284e-05, "loss": 0.8226457834243774, "step": 2236 }, { "epoch": 2.7447852760736198, "grad_norm": 0.1863575130701065, "learning_rate": 4.874645702613152e-05, "loss": 0.8024981021881104, "step": 2237 }, { "epoch": 2.7460122699386504, "grad_norm": 0.18464937806129456, "learning_rate": 4.87448871528838e-05, "loss": 0.8853721618652344, "step": 2238 }, { "epoch": 2.747239263803681, "grad_norm": 0.2640102207660675, "learning_rate": 4.8743316322547404e-05, "loss": 0.6889677047729492, "step": 2239 }, { "epoch": 2.7484662576687118, "grad_norm": 0.21362146735191345, "learning_rate": 4.8741744535185647e-05, "loss": 0.7564080953598022, "step": 2240 }, { "epoch": 2.7496932515337424, "grad_norm": 0.1810959130525589, "learning_rate": 4.8740171790861875e-05, "loss": 0.9742476940155029, "step": 2241 }, { "epoch": 2.750920245398773, "grad_norm": 0.19337241351604462, "learning_rate": 4.8738598089639493e-05, "loss": 0.8318847417831421, "step": 2242 }, { "epoch": 2.752147239263804, "grad_norm": 0.18684065341949463, "learning_rate": 4.873702343158192e-05, "loss": 0.825664758682251, "step": 2243 }, { "epoch": 2.7533742331288344, "grad_norm": 0.21743819117546082, "learning_rate": 4.873544781675264e-05, "loss": 0.8264352083206177, "step": 2244 }, { "epoch": 2.754601226993865, "grad_norm": 0.15842528641223907, "learning_rate": 4.873387124521515e-05, "loss": 0.8846437931060791, "step": 2245 }, { "epoch": 2.755828220858896, "grad_norm": 0.21744461357593536, "learning_rate": 4.873229371703301e-05, "loss": 0.7175818681716919, "step": 2246 }, { "epoch": 2.7570552147239265, "grad_norm": 0.19335415959358215, "learning_rate": 4.873071523226978e-05, "loss": 0.8596925735473633, "step": 2247 }, { "epoch": 2.758282208588957, "grad_norm": 0.1980038285255432, "learning_rate": 4.872913579098911e-05, "loss": 0.8662509918212891, "step": 2248 }, { "epoch": 2.759509202453988, "grad_norm": 0.20970319211483002, "learning_rate": 4.872755539325465e-05, "loss": 0.8842437267303467, "step": 2249 }, { "epoch": 2.7607361963190185, "grad_norm": 0.25263282656669617, "learning_rate": 4.872597403913011e-05, "loss": 0.7961280345916748, "step": 2250 }, { "epoch": 2.761963190184049, "grad_norm": 0.2225959300994873, "learning_rate": 4.872439172867923e-05, "loss": 0.8049070835113525, "step": 2251 }, { "epoch": 2.76319018404908, "grad_norm": 0.1910174936056137, "learning_rate": 4.8722808461965775e-05, "loss": 0.9372878074645996, "step": 2252 }, { "epoch": 2.7644171779141105, "grad_norm": 0.20237833261489868, "learning_rate": 4.872122423905358e-05, "loss": 0.8259061574935913, "step": 2253 }, { "epoch": 2.765644171779141, "grad_norm": 0.2098434418439865, "learning_rate": 4.871963906000648e-05, "loss": 1.008679986000061, "step": 2254 }, { "epoch": 2.766871165644172, "grad_norm": 0.1865292191505432, "learning_rate": 4.871805292488838e-05, "loss": 0.9939478635787964, "step": 2255 }, { "epoch": 2.7680981595092025, "grad_norm": 0.20382489264011383, "learning_rate": 4.871646583376322e-05, "loss": 0.88614821434021, "step": 2256 }, { "epoch": 2.769325153374233, "grad_norm": 0.20887808501720428, "learning_rate": 4.8714877786694956e-05, "loss": 0.825858473777771, "step": 2257 }, { "epoch": 2.770552147239264, "grad_norm": 0.21432863175868988, "learning_rate": 4.871328878374761e-05, "loss": 0.741875410079956, "step": 2258 }, { "epoch": 2.7717791411042945, "grad_norm": 0.16972926259040833, "learning_rate": 4.871169882498523e-05, "loss": 0.9789024591445923, "step": 2259 }, { "epoch": 2.773006134969325, "grad_norm": 0.19091537594795227, "learning_rate": 4.871010791047189e-05, "loss": 0.8910320997238159, "step": 2260 }, { "epoch": 2.774233128834356, "grad_norm": 0.20528443157672882, "learning_rate": 4.870851604027173e-05, "loss": 0.8621368408203125, "step": 2261 }, { "epoch": 2.7754601226993865, "grad_norm": 0.18440629541873932, "learning_rate": 4.8706923214448904e-05, "loss": 1.0602978467941284, "step": 2262 }, { "epoch": 2.776687116564417, "grad_norm": 0.2164248675107956, "learning_rate": 4.870532943306761e-05, "loss": 0.7761361598968506, "step": 2263 }, { "epoch": 2.777914110429448, "grad_norm": 0.2026340216398239, "learning_rate": 4.870373469619211e-05, "loss": 0.9146266579627991, "step": 2264 }, { "epoch": 2.7791411042944785, "grad_norm": 0.1859034150838852, "learning_rate": 4.8702139003886664e-05, "loss": 0.8555178642272949, "step": 2265 }, { "epoch": 2.780368098159509, "grad_norm": 0.168351948261261, "learning_rate": 4.870054235621559e-05, "loss": 0.93840491771698, "step": 2266 }, { "epoch": 2.78159509202454, "grad_norm": 0.22616833448410034, "learning_rate": 4.869894475324326e-05, "loss": 0.829272985458374, "step": 2267 }, { "epoch": 2.7828220858895705, "grad_norm": 0.18747378885746002, "learning_rate": 4.869734619503406e-05, "loss": 0.9504947662353516, "step": 2268 }, { "epoch": 2.784049079754601, "grad_norm": 0.18400514125823975, "learning_rate": 4.869574668165241e-05, "loss": 0.870345950126648, "step": 2269 }, { "epoch": 2.785276073619632, "grad_norm": 0.1948077380657196, "learning_rate": 4.869414621316279e-05, "loss": 0.8550519347190857, "step": 2270 }, { "epoch": 2.7865030674846625, "grad_norm": 0.23022542893886566, "learning_rate": 4.869254478962973e-05, "loss": 0.8711827397346497, "step": 2271 }, { "epoch": 2.787730061349693, "grad_norm": 0.18638604879379272, "learning_rate": 4.8690942411117754e-05, "loss": 0.9199835062026978, "step": 2272 }, { "epoch": 2.788957055214724, "grad_norm": 0.16813714802265167, "learning_rate": 4.868933907769146e-05, "loss": 1.0560388565063477, "step": 2273 }, { "epoch": 2.7901840490797545, "grad_norm": 0.19344636797904968, "learning_rate": 4.8687734789415474e-05, "loss": 0.8511008024215698, "step": 2274 }, { "epoch": 2.791411042944785, "grad_norm": 0.21760523319244385, "learning_rate": 4.868612954635445e-05, "loss": 0.7900312542915344, "step": 2275 }, { "epoch": 2.792638036809816, "grad_norm": 0.18076631426811218, "learning_rate": 4.868452334857311e-05, "loss": 0.9010727405548096, "step": 2276 }, { "epoch": 2.7938650306748465, "grad_norm": 0.2317585051059723, "learning_rate": 4.8682916196136176e-05, "loss": 0.729763925075531, "step": 2277 }, { "epoch": 2.795092024539877, "grad_norm": 0.1762486845254898, "learning_rate": 4.8681308089108435e-05, "loss": 0.7577584981918335, "step": 2278 }, { "epoch": 2.796319018404908, "grad_norm": 0.22943581640720367, "learning_rate": 4.867969902755471e-05, "loss": 0.8376009464263916, "step": 2279 }, { "epoch": 2.7975460122699385, "grad_norm": 0.18616993725299835, "learning_rate": 4.8678089011539856e-05, "loss": 0.7970684170722961, "step": 2280 }, { "epoch": 2.7987730061349696, "grad_norm": 0.19832581281661987, "learning_rate": 4.867647804112876e-05, "loss": 0.727027416229248, "step": 2281 }, { "epoch": 2.8, "grad_norm": 0.2644442915916443, "learning_rate": 4.867486611638637e-05, "loss": 0.6588122248649597, "step": 2282 }, { "epoch": 2.801226993865031, "grad_norm": 0.21603204309940338, "learning_rate": 4.867325323737765e-05, "loss": 0.9592055082321167, "step": 2283 }, { "epoch": 2.802453987730061, "grad_norm": 0.20020648837089539, "learning_rate": 4.86716394041676e-05, "loss": 0.8336389064788818, "step": 2284 }, { "epoch": 2.8036809815950923, "grad_norm": 0.19504986703395844, "learning_rate": 4.867002461682128e-05, "loss": 0.9253360033035278, "step": 2285 }, { "epoch": 2.8049079754601225, "grad_norm": 0.18615242838859558, "learning_rate": 4.866840887540378e-05, "loss": 0.7370775938034058, "step": 2286 }, { "epoch": 2.8061349693251536, "grad_norm": 0.1831006556749344, "learning_rate": 4.866679217998023e-05, "loss": 0.7993668913841248, "step": 2287 }, { "epoch": 2.807361963190184, "grad_norm": 0.2227163463830948, "learning_rate": 4.866517453061578e-05, "loss": 0.7085292339324951, "step": 2288 }, { "epoch": 2.808588957055215, "grad_norm": 0.1983010172843933, "learning_rate": 4.866355592737564e-05, "loss": 0.7953518033027649, "step": 2289 }, { "epoch": 2.809815950920245, "grad_norm": 0.20538492500782013, "learning_rate": 4.866193637032505e-05, "loss": 0.7498536705970764, "step": 2290 }, { "epoch": 2.8110429447852763, "grad_norm": 0.22306470572948456, "learning_rate": 4.866031585952929e-05, "loss": 0.7319766879081726, "step": 2291 }, { "epoch": 2.8122699386503065, "grad_norm": 0.20153065025806427, "learning_rate": 4.8658694395053686e-05, "loss": 0.9609556794166565, "step": 2292 }, { "epoch": 2.8134969325153376, "grad_norm": 0.1828504502773285, "learning_rate": 4.865707197696358e-05, "loss": 0.8316512107849121, "step": 2293 }, { "epoch": 2.814723926380368, "grad_norm": 0.17556749284267426, "learning_rate": 4.8655448605324384e-05, "loss": 0.7914236783981323, "step": 2294 }, { "epoch": 2.815950920245399, "grad_norm": 0.2093363106250763, "learning_rate": 4.865382428020151e-05, "loss": 0.8628669381141663, "step": 2295 }, { "epoch": 2.817177914110429, "grad_norm": 0.21873699128627777, "learning_rate": 4.865219900166045e-05, "loss": 0.7857377529144287, "step": 2296 }, { "epoch": 2.8184049079754603, "grad_norm": 0.21645843982696533, "learning_rate": 4.86505727697667e-05, "loss": 0.9862703084945679, "step": 2297 }, { "epoch": 2.819631901840491, "grad_norm": 0.1903757005929947, "learning_rate": 4.8648945584585824e-05, "loss": 0.8080551028251648, "step": 2298 }, { "epoch": 2.8208588957055216, "grad_norm": 0.22767390310764313, "learning_rate": 4.864731744618339e-05, "loss": 0.7388708591461182, "step": 2299 }, { "epoch": 2.8220858895705523, "grad_norm": 0.2006775587797165, "learning_rate": 4.864568835462505e-05, "loss": 0.8503235578536987, "step": 2300 }, { "epoch": 2.823312883435583, "grad_norm": 0.19984549283981323, "learning_rate": 4.864405830997644e-05, "loss": 0.9309889674186707, "step": 2301 }, { "epoch": 2.8245398773006136, "grad_norm": 0.22326242923736572, "learning_rate": 4.864242731230327e-05, "loss": 1.0130668878555298, "step": 2302 }, { "epoch": 2.8257668711656443, "grad_norm": 0.21382533013820648, "learning_rate": 4.86407953616713e-05, "loss": 1.1443579196929932, "step": 2303 }, { "epoch": 2.826993865030675, "grad_norm": 0.17931216955184937, "learning_rate": 4.863916245814628e-05, "loss": 0.9330106973648071, "step": 2304 }, { "epoch": 2.8282208588957056, "grad_norm": 0.19510099291801453, "learning_rate": 4.863752860179405e-05, "loss": 0.8014877438545227, "step": 2305 }, { "epoch": 2.8294478527607363, "grad_norm": 0.2128099799156189, "learning_rate": 4.8635893792680456e-05, "loss": 0.8685052990913391, "step": 2306 }, { "epoch": 2.830674846625767, "grad_norm": 0.16358204185962677, "learning_rate": 4.8634258030871405e-05, "loss": 0.7672951817512512, "step": 2307 }, { "epoch": 2.8319018404907976, "grad_norm": 0.1618814468383789, "learning_rate": 4.86326213164328e-05, "loss": 0.8740593194961548, "step": 2308 }, { "epoch": 2.8331288343558283, "grad_norm": 0.225938618183136, "learning_rate": 4.863098364943065e-05, "loss": 0.8185594081878662, "step": 2309 }, { "epoch": 2.834355828220859, "grad_norm": 0.24764502048492432, "learning_rate": 4.862934502993094e-05, "loss": 0.6428978443145752, "step": 2310 }, { "epoch": 2.8355828220858896, "grad_norm": 0.18329966068267822, "learning_rate": 4.862770545799973e-05, "loss": 0.7036811113357544, "step": 2311 }, { "epoch": 2.8368098159509203, "grad_norm": 0.207442045211792, "learning_rate": 4.862606493370309e-05, "loss": 0.8433369398117065, "step": 2312 }, { "epoch": 2.838036809815951, "grad_norm": 0.17528177797794342, "learning_rate": 4.862442345710716e-05, "loss": 0.7858867645263672, "step": 2313 }, { "epoch": 2.8392638036809816, "grad_norm": 0.21563221514225006, "learning_rate": 4.86227810282781e-05, "loss": 0.8398888111114502, "step": 2314 }, { "epoch": 2.8404907975460123, "grad_norm": 0.20079122483730316, "learning_rate": 4.8621137647282116e-05, "loss": 0.7629067897796631, "step": 2315 }, { "epoch": 2.841717791411043, "grad_norm": 0.20982979238033295, "learning_rate": 4.861949331418544e-05, "loss": 0.9108778238296509, "step": 2316 }, { "epoch": 2.8429447852760736, "grad_norm": 0.17790593206882477, "learning_rate": 4.8617848029054354e-05, "loss": 0.873334527015686, "step": 2317 }, { "epoch": 2.8441717791411043, "grad_norm": 0.2543090879917145, "learning_rate": 4.861620179195517e-05, "loss": 0.8364248871803284, "step": 2318 }, { "epoch": 2.845398773006135, "grad_norm": 0.21321943402290344, "learning_rate": 4.861455460295425e-05, "loss": 0.7920362949371338, "step": 2319 }, { "epoch": 2.8466257668711656, "grad_norm": 0.4238658547401428, "learning_rate": 4.8612906462117994e-05, "loss": 0.7821420431137085, "step": 2320 }, { "epoch": 2.8478527607361963, "grad_norm": 0.2544320225715637, "learning_rate": 4.8611257369512806e-05, "loss": 0.7372010946273804, "step": 2321 }, { "epoch": 2.849079754601227, "grad_norm": 0.18470415472984314, "learning_rate": 4.860960732520518e-05, "loss": 0.7842522263526917, "step": 2322 }, { "epoch": 2.8503067484662576, "grad_norm": 0.20463547110557556, "learning_rate": 4.8607956329261635e-05, "loss": 1.015999674797058, "step": 2323 }, { "epoch": 2.8515337423312883, "grad_norm": 0.18137171864509583, "learning_rate": 4.860630438174869e-05, "loss": 0.8599882125854492, "step": 2324 }, { "epoch": 2.852760736196319, "grad_norm": 0.21318857371807098, "learning_rate": 4.8604651482732945e-05, "loss": 0.7243251800537109, "step": 2325 }, { "epoch": 2.8539877300613496, "grad_norm": 0.20090478658676147, "learning_rate": 4.8602997632281025e-05, "loss": 0.9104819297790527, "step": 2326 }, { "epoch": 2.8552147239263803, "grad_norm": 0.19954998791217804, "learning_rate": 4.8601342830459575e-05, "loss": 0.7551555037498474, "step": 2327 }, { "epoch": 2.856441717791411, "grad_norm": 0.2212841659784317, "learning_rate": 4.859968707733532e-05, "loss": 0.9059866666793823, "step": 2328 }, { "epoch": 2.8576687116564417, "grad_norm": 0.19991709291934967, "learning_rate": 4.859803037297499e-05, "loss": 0.8596267700195312, "step": 2329 }, { "epoch": 2.8588957055214723, "grad_norm": 0.17487257719039917, "learning_rate": 4.859637271744535e-05, "loss": 0.911170482635498, "step": 2330 }, { "epoch": 2.860122699386503, "grad_norm": 0.19414977729320526, "learning_rate": 4.859471411081323e-05, "loss": 0.7856262922286987, "step": 2331 }, { "epoch": 2.8613496932515337, "grad_norm": 0.21486344933509827, "learning_rate": 4.8593054553145476e-05, "loss": 0.8100720643997192, "step": 2332 }, { "epoch": 2.8625766871165643, "grad_norm": 0.8146612644195557, "learning_rate": 4.859139404450899e-05, "loss": 0.8804492950439453, "step": 2333 }, { "epoch": 2.863803680981595, "grad_norm": 0.20587903261184692, "learning_rate": 4.8589732584970685e-05, "loss": 0.7820267081260681, "step": 2334 }, { "epoch": 2.8650306748466257, "grad_norm": 0.24400831758975983, "learning_rate": 4.858807017459755e-05, "loss": 0.8196598291397095, "step": 2335 }, { "epoch": 2.8662576687116563, "grad_norm": 0.18768756091594696, "learning_rate": 4.8586406813456564e-05, "loss": 0.9139707088470459, "step": 2336 }, { "epoch": 2.867484662576687, "grad_norm": 0.19731289148330688, "learning_rate": 4.85847425016148e-05, "loss": 0.9312405586242676, "step": 2337 }, { "epoch": 2.8687116564417177, "grad_norm": 0.21797576546669006, "learning_rate": 4.858307723913933e-05, "loss": 0.8825380802154541, "step": 2338 }, { "epoch": 2.8699386503067483, "grad_norm": 0.20857945084571838, "learning_rate": 4.858141102609729e-05, "loss": 0.8598877191543579, "step": 2339 }, { "epoch": 2.871165644171779, "grad_norm": 0.21774710714817047, "learning_rate": 4.857974386255582e-05, "loss": 0.8189966678619385, "step": 2340 }, { "epoch": 2.8723926380368097, "grad_norm": 0.17192105948925018, "learning_rate": 4.857807574858212e-05, "loss": 0.9294134378433228, "step": 2341 }, { "epoch": 2.8736196319018403, "grad_norm": 0.17387856543064117, "learning_rate": 4.8576406684243434e-05, "loss": 0.753828763961792, "step": 2342 }, { "epoch": 2.874846625766871, "grad_norm": 0.2530936598777771, "learning_rate": 4.8574736669607036e-05, "loss": 0.7807598114013672, "step": 2343 }, { "epoch": 2.876073619631902, "grad_norm": 0.15864601731300354, "learning_rate": 4.857306570474025e-05, "loss": 1.0167837142944336, "step": 2344 }, { "epoch": 2.8773006134969323, "grad_norm": 0.17902207374572754, "learning_rate": 4.857139378971041e-05, "loss": 0.7946648597717285, "step": 2345 }, { "epoch": 2.8785276073619634, "grad_norm": 0.20897962152957916, "learning_rate": 4.856972092458492e-05, "loss": 0.8761323094367981, "step": 2346 }, { "epoch": 2.8797546012269937, "grad_norm": 0.2069242000579834, "learning_rate": 4.856804710943119e-05, "loss": 0.7649979591369629, "step": 2347 }, { "epoch": 2.880981595092025, "grad_norm": 0.27195754647254944, "learning_rate": 4.856637234431671e-05, "loss": 0.7161579132080078, "step": 2348 }, { "epoch": 2.882208588957055, "grad_norm": 0.2664297819137573, "learning_rate": 4.856469662930897e-05, "loss": 0.8729324340820312, "step": 2349 }, { "epoch": 2.883435582822086, "grad_norm": 0.2200084626674652, "learning_rate": 4.856301996447552e-05, "loss": 0.8816111087799072, "step": 2350 }, { "epoch": 2.8846625766871163, "grad_norm": 0.18490011990070343, "learning_rate": 4.856134234988394e-05, "loss": 0.8005890846252441, "step": 2351 }, { "epoch": 2.8858895705521475, "grad_norm": 0.19800952076911926, "learning_rate": 4.855966378560185e-05, "loss": 0.864493727684021, "step": 2352 }, { "epoch": 2.8871165644171777, "grad_norm": 0.19569087028503418, "learning_rate": 4.85579842716969e-05, "loss": 0.7598820328712463, "step": 2353 }, { "epoch": 2.888343558282209, "grad_norm": 0.2329026758670807, "learning_rate": 4.85563038082368e-05, "loss": 0.8255558013916016, "step": 2354 }, { "epoch": 2.889570552147239, "grad_norm": 0.2116190493106842, "learning_rate": 4.8554622395289275e-05, "loss": 0.9660906791687012, "step": 2355 }, { "epoch": 2.89079754601227, "grad_norm": 0.19357550144195557, "learning_rate": 4.85529400329221e-05, "loss": 0.7879614233970642, "step": 2356 }, { "epoch": 2.8920245398773003, "grad_norm": 0.20136454701423645, "learning_rate": 4.855125672120309e-05, "loss": 0.8869798183441162, "step": 2357 }, { "epoch": 2.8932515337423315, "grad_norm": 0.17923572659492493, "learning_rate": 4.854957246020009e-05, "loss": 0.843406617641449, "step": 2358 }, { "epoch": 2.894478527607362, "grad_norm": 0.18216949701309204, "learning_rate": 4.854788724998099e-05, "loss": 0.8893895149230957, "step": 2359 }, { "epoch": 2.895705521472393, "grad_norm": 0.19586728513240814, "learning_rate": 4.854620109061372e-05, "loss": 0.8434507250785828, "step": 2360 }, { "epoch": 2.8969325153374235, "grad_norm": 0.18249212205410004, "learning_rate": 4.854451398216624e-05, "loss": 1.0560016632080078, "step": 2361 }, { "epoch": 2.898159509202454, "grad_norm": 0.18344323337078094, "learning_rate": 4.854282592470655e-05, "loss": 0.7456541061401367, "step": 2362 }, { "epoch": 2.899386503067485, "grad_norm": 0.17830757796764374, "learning_rate": 4.8541136918302686e-05, "loss": 1.0747485160827637, "step": 2363 }, { "epoch": 2.9006134969325155, "grad_norm": 0.20086035132408142, "learning_rate": 4.8539446963022736e-05, "loss": 0.7871142029762268, "step": 2364 }, { "epoch": 2.901840490797546, "grad_norm": 0.20834970474243164, "learning_rate": 4.8537756058934826e-05, "loss": 0.8348067998886108, "step": 2365 }, { "epoch": 2.903067484662577, "grad_norm": 0.23086094856262207, "learning_rate": 4.8536064206107096e-05, "loss": 0.5939435958862305, "step": 2366 }, { "epoch": 2.9042944785276075, "grad_norm": 0.188637375831604, "learning_rate": 4.8534371404607745e-05, "loss": 0.8167935609817505, "step": 2367 }, { "epoch": 2.905521472392638, "grad_norm": 0.18494835495948792, "learning_rate": 4.8532677654505e-05, "loss": 0.9631688594818115, "step": 2368 }, { "epoch": 2.906748466257669, "grad_norm": 0.17503798007965088, "learning_rate": 4.8530982955867144e-05, "loss": 0.8232402801513672, "step": 2369 }, { "epoch": 2.9079754601226995, "grad_norm": 0.21527014672756195, "learning_rate": 4.852928730876247e-05, "loss": 0.730875551700592, "step": 2370 }, { "epoch": 2.90920245398773, "grad_norm": 0.17839382588863373, "learning_rate": 4.852759071325934e-05, "loss": 0.9323233366012573, "step": 2371 }, { "epoch": 2.910429447852761, "grad_norm": 0.21817676723003387, "learning_rate": 4.852589316942612e-05, "loss": 0.7875056266784668, "step": 2372 }, { "epoch": 2.9116564417177915, "grad_norm": 0.18433673679828644, "learning_rate": 4.8524194677331254e-05, "loss": 0.8510193824768066, "step": 2373 }, { "epoch": 2.912883435582822, "grad_norm": 0.17930857837200165, "learning_rate": 4.852249523704319e-05, "loss": 0.9048581123352051, "step": 2374 }, { "epoch": 2.914110429447853, "grad_norm": 0.20276638865470886, "learning_rate": 4.852079484863043e-05, "loss": 0.8692147135734558, "step": 2375 }, { "epoch": 2.9153374233128835, "grad_norm": 0.25684741139411926, "learning_rate": 4.8519093512161516e-05, "loss": 0.6906944513320923, "step": 2376 }, { "epoch": 2.916564417177914, "grad_norm": 0.21291400492191315, "learning_rate": 4.851739122770502e-05, "loss": 0.7057817578315735, "step": 2377 }, { "epoch": 2.917791411042945, "grad_norm": 0.22441226243972778, "learning_rate": 4.8515687995329556e-05, "loss": 0.8265827894210815, "step": 2378 }, { "epoch": 2.9190184049079755, "grad_norm": 0.20801959931850433, "learning_rate": 4.851398381510378e-05, "loss": 0.8276580572128296, "step": 2379 }, { "epoch": 2.920245398773006, "grad_norm": 0.2121553122997284, "learning_rate": 4.851227868709638e-05, "loss": 0.907860517501831, "step": 2380 }, { "epoch": 2.921472392638037, "grad_norm": 0.22018493711948395, "learning_rate": 4.851057261137608e-05, "loss": 0.953737735748291, "step": 2381 }, { "epoch": 2.9226993865030675, "grad_norm": 0.20869919657707214, "learning_rate": 4.850886558801166e-05, "loss": 0.8426697850227356, "step": 2382 }, { "epoch": 2.923926380368098, "grad_norm": 0.22641541063785553, "learning_rate": 4.8507157617071916e-05, "loss": 0.9031780958175659, "step": 2383 }, { "epoch": 2.925153374233129, "grad_norm": 0.22530952095985413, "learning_rate": 4.850544869862569e-05, "loss": 0.8415220975875854, "step": 2384 }, { "epoch": 2.9263803680981595, "grad_norm": 0.16556629538536072, "learning_rate": 4.850373883274187e-05, "loss": 0.9058209657669067, "step": 2385 }, { "epoch": 2.92760736196319, "grad_norm": 0.20506824553012848, "learning_rate": 4.850202801948937e-05, "loss": 0.8138816356658936, "step": 2386 }, { "epoch": 2.928834355828221, "grad_norm": 0.1979113668203354, "learning_rate": 4.850031625893715e-05, "loss": 0.9595090746879578, "step": 2387 }, { "epoch": 2.9300613496932515, "grad_norm": 0.2799171209335327, "learning_rate": 4.8498603551154207e-05, "loss": 0.8185895681381226, "step": 2388 }, { "epoch": 2.931288343558282, "grad_norm": 0.1984681338071823, "learning_rate": 4.8496889896209576e-05, "loss": 0.8193260431289673, "step": 2389 }, { "epoch": 2.932515337423313, "grad_norm": 0.18677310645580292, "learning_rate": 4.849517529417232e-05, "loss": 0.8879241943359375, "step": 2390 }, { "epoch": 2.9337423312883435, "grad_norm": 0.18691444396972656, "learning_rate": 4.849345974511157e-05, "loss": 0.9373584985733032, "step": 2391 }, { "epoch": 2.934969325153374, "grad_norm": 0.20434890687465668, "learning_rate": 4.849174324909646e-05, "loss": 0.777310848236084, "step": 2392 }, { "epoch": 2.936196319018405, "grad_norm": 0.2161477655172348, "learning_rate": 4.849002580619618e-05, "loss": 0.938471794128418, "step": 2393 }, { "epoch": 2.9374233128834355, "grad_norm": 0.253162682056427, "learning_rate": 4.8488307416479954e-05, "loss": 0.7362055778503418, "step": 2394 }, { "epoch": 2.938650306748466, "grad_norm": 0.20741362869739532, "learning_rate": 4.8486588080017046e-05, "loss": 0.7944363355636597, "step": 2395 }, { "epoch": 2.939877300613497, "grad_norm": 0.2344476282596588, "learning_rate": 4.848486779687676e-05, "loss": 0.7351897954940796, "step": 2396 }, { "epoch": 2.9411042944785275, "grad_norm": 0.2089030146598816, "learning_rate": 4.848314656712843e-05, "loss": 0.8054323196411133, "step": 2397 }, { "epoch": 2.942331288343558, "grad_norm": 0.20918546617031097, "learning_rate": 4.8481424390841445e-05, "loss": 0.8449022769927979, "step": 2398 }, { "epoch": 2.943558282208589, "grad_norm": 0.2336791306734085, "learning_rate": 4.8479701268085215e-05, "loss": 0.8071671724319458, "step": 2399 }, { "epoch": 2.9447852760736195, "grad_norm": 0.28193745017051697, "learning_rate": 4.847797719892919e-05, "loss": 0.6585954427719116, "step": 2400 }, { "epoch": 2.94601226993865, "grad_norm": 0.210119366645813, "learning_rate": 4.847625218344286e-05, "loss": 0.8948401212692261, "step": 2401 }, { "epoch": 2.947239263803681, "grad_norm": 0.19502267241477966, "learning_rate": 4.847452622169577e-05, "loss": 0.9282833933830261, "step": 2402 }, { "epoch": 2.9484662576687115, "grad_norm": 0.20842581987380981, "learning_rate": 4.8472799313757475e-05, "loss": 0.8219221830368042, "step": 2403 }, { "epoch": 2.949693251533742, "grad_norm": 0.21848897635936737, "learning_rate": 4.847107145969758e-05, "loss": 0.8023135662078857, "step": 2404 }, { "epoch": 2.950920245398773, "grad_norm": 0.18823470175266266, "learning_rate": 4.8469342659585744e-05, "loss": 0.8423912525177002, "step": 2405 }, { "epoch": 2.9521472392638035, "grad_norm": 0.2158244103193283, "learning_rate": 4.8467612913491636e-05, "loss": 0.8986295461654663, "step": 2406 }, { "epoch": 2.9533742331288346, "grad_norm": 0.22020173072814941, "learning_rate": 4.846588222148499e-05, "loss": 0.8497679233551025, "step": 2407 }, { "epoch": 2.954601226993865, "grad_norm": 0.19335298240184784, "learning_rate": 4.8464150583635556e-05, "loss": 0.9180799722671509, "step": 2408 }, { "epoch": 2.955828220858896, "grad_norm": 0.21949659287929535, "learning_rate": 4.8462418000013134e-05, "loss": 0.6633414626121521, "step": 2409 }, { "epoch": 2.957055214723926, "grad_norm": 0.23193715512752533, "learning_rate": 4.8460684470687553e-05, "loss": 0.6916857957839966, "step": 2410 }, { "epoch": 2.9582822085889573, "grad_norm": 0.22026395797729492, "learning_rate": 4.8458949995728706e-05, "loss": 0.8368027210235596, "step": 2411 }, { "epoch": 2.9595092024539875, "grad_norm": 0.2515547573566437, "learning_rate": 4.845721457520648e-05, "loss": 0.8855043053627014, "step": 2412 }, { "epoch": 2.9607361963190186, "grad_norm": 0.19941172003746033, "learning_rate": 4.845547820919084e-05, "loss": 0.8053042888641357, "step": 2413 }, { "epoch": 2.961963190184049, "grad_norm": 0.20813921093940735, "learning_rate": 4.845374089775178e-05, "loss": 0.8079801797866821, "step": 2414 }, { "epoch": 2.96319018404908, "grad_norm": 0.1877204030752182, "learning_rate": 4.8452002640959306e-05, "loss": 0.9245579242706299, "step": 2415 }, { "epoch": 2.96441717791411, "grad_norm": 0.22455088794231415, "learning_rate": 4.845026343888349e-05, "loss": 0.7051551342010498, "step": 2416 }, { "epoch": 2.9656441717791413, "grad_norm": 0.22176781296730042, "learning_rate": 4.844852329159444e-05, "loss": 0.9030723571777344, "step": 2417 }, { "epoch": 2.9668711656441715, "grad_norm": 0.2487548589706421, "learning_rate": 4.84467821991623e-05, "loss": 0.745032787322998, "step": 2418 }, { "epoch": 2.9680981595092026, "grad_norm": 0.20266790688037872, "learning_rate": 4.844504016165723e-05, "loss": 0.862288236618042, "step": 2419 }, { "epoch": 2.969325153374233, "grad_norm": 0.20527668297290802, "learning_rate": 4.844329717914947e-05, "loss": 0.81636643409729, "step": 2420 }, { "epoch": 2.970552147239264, "grad_norm": 0.19548161327838898, "learning_rate": 4.844155325170926e-05, "loss": 0.7467626333236694, "step": 2421 }, { "epoch": 2.9717791411042946, "grad_norm": 0.20499254763126373, "learning_rate": 4.843980837940689e-05, "loss": 0.7653746008872986, "step": 2422 }, { "epoch": 2.9730061349693253, "grad_norm": 0.20589032769203186, "learning_rate": 4.8438062562312704e-05, "loss": 0.789774477481842, "step": 2423 }, { "epoch": 2.974233128834356, "grad_norm": 0.17341703176498413, "learning_rate": 4.8436315800497055e-05, "loss": 0.8651480078697205, "step": 2424 }, { "epoch": 2.9754601226993866, "grad_norm": 0.17126064002513885, "learning_rate": 4.8434568094030365e-05, "loss": 0.9081021547317505, "step": 2425 }, { "epoch": 2.9766871165644173, "grad_norm": 0.22224067151546478, "learning_rate": 4.843281944298307e-05, "loss": 0.891389012336731, "step": 2426 }, { "epoch": 2.977914110429448, "grad_norm": 0.19236508011817932, "learning_rate": 4.843106984742566e-05, "loss": 0.7622532844543457, "step": 2427 }, { "epoch": 2.9791411042944786, "grad_norm": 0.17860658466815948, "learning_rate": 4.842931930742865e-05, "loss": 0.8308079242706299, "step": 2428 }, { "epoch": 2.9803680981595093, "grad_norm": 0.212263822555542, "learning_rate": 4.8427567823062606e-05, "loss": 0.8808709383010864, "step": 2429 }, { "epoch": 2.98159509202454, "grad_norm": 0.18718744814395905, "learning_rate": 4.842581539439811e-05, "loss": 0.8904514312744141, "step": 2430 }, { "epoch": 2.9828220858895707, "grad_norm": 0.22643092274665833, "learning_rate": 4.842406202150581e-05, "loss": 0.7701482772827148, "step": 2431 }, { "epoch": 2.9840490797546013, "grad_norm": 0.20476804673671722, "learning_rate": 4.842230770445638e-05, "loss": 0.8100928664207458, "step": 2432 }, { "epoch": 2.985276073619632, "grad_norm": 0.21595726907253265, "learning_rate": 4.8420552443320535e-05, "loss": 0.8893564939498901, "step": 2433 }, { "epoch": 2.9865030674846627, "grad_norm": 0.210011288523674, "learning_rate": 4.841879623816901e-05, "loss": 0.7951527833938599, "step": 2434 }, { "epoch": 2.9877300613496933, "grad_norm": 0.19779559969902039, "learning_rate": 4.84170390890726e-05, "loss": 0.8990136384963989, "step": 2435 }, { "epoch": 2.988957055214724, "grad_norm": 0.24682684242725372, "learning_rate": 4.841528099610214e-05, "loss": 0.626247763633728, "step": 2436 }, { "epoch": 2.9901840490797547, "grad_norm": 0.17092661559581757, "learning_rate": 4.8413521959328487e-05, "loss": 1.0160741806030273, "step": 2437 }, { "epoch": 2.9914110429447853, "grad_norm": 0.20836254954338074, "learning_rate": 4.8411761978822535e-05, "loss": 0.8194328546524048, "step": 2438 }, { "epoch": 2.992638036809816, "grad_norm": 0.20119605958461761, "learning_rate": 4.841000105465523e-05, "loss": 0.8119667768478394, "step": 2439 }, { "epoch": 2.9938650306748467, "grad_norm": 0.20560839772224426, "learning_rate": 4.8408239186897544e-05, "loss": 0.8051453828811646, "step": 2440 }, { "epoch": 2.9950920245398773, "grad_norm": 0.1775335669517517, "learning_rate": 4.8406476375620514e-05, "loss": 0.9071564674377441, "step": 2441 }, { "epoch": 2.996319018404908, "grad_norm": 0.21730849146842957, "learning_rate": 4.840471262089517e-05, "loss": 0.6683924198150635, "step": 2442 }, { "epoch": 2.9975460122699387, "grad_norm": 0.22096973657608032, "learning_rate": 4.840294792279261e-05, "loss": 0.823804497718811, "step": 2443 }, { "epoch": 2.9987730061349693, "grad_norm": 0.1792891025543213, "learning_rate": 4.840118228138397e-05, "loss": 0.7724466323852539, "step": 2444 }, { "epoch": 3.0, "grad_norm": 0.2313765436410904, "learning_rate": 4.839941569674041e-05, "loss": 0.7456013560295105, "step": 2445 }, { "epoch": 3.0012269938650307, "grad_norm": 0.23136721551418304, "learning_rate": 4.839764816893315e-05, "loss": 0.9125158786773682, "step": 2446 }, { "epoch": 3.0024539877300613, "grad_norm": 0.2240671068429947, "learning_rate": 4.839587969803341e-05, "loss": 0.8873602151870728, "step": 2447 }, { "epoch": 3.003680981595092, "grad_norm": 0.2689926326274872, "learning_rate": 4.83941102841125e-05, "loss": 0.726958692073822, "step": 2448 }, { "epoch": 3.0049079754601227, "grad_norm": 0.1935441493988037, "learning_rate": 4.8392339927241717e-05, "loss": 0.9272753000259399, "step": 2449 }, { "epoch": 3.0061349693251533, "grad_norm": 0.2098952829837799, "learning_rate": 4.839056862749244e-05, "loss": 0.9021774530410767, "step": 2450 }, { "epoch": 3.007361963190184, "grad_norm": 0.1960432380437851, "learning_rate": 4.838879638493604e-05, "loss": 0.9927902817726135, "step": 2451 }, { "epoch": 3.0085889570552147, "grad_norm": 0.22050638496875763, "learning_rate": 4.838702319964398e-05, "loss": 0.8255438804626465, "step": 2452 }, { "epoch": 3.0098159509202453, "grad_norm": 0.23369519412517548, "learning_rate": 4.83852490716877e-05, "loss": 0.7029317617416382, "step": 2453 }, { "epoch": 3.011042944785276, "grad_norm": 0.21131989359855652, "learning_rate": 4.838347400113874e-05, "loss": 0.8310978412628174, "step": 2454 }, { "epoch": 3.0122699386503067, "grad_norm": 0.18781746923923492, "learning_rate": 4.838169798806863e-05, "loss": 0.9648561477661133, "step": 2455 }, { "epoch": 3.0134969325153373, "grad_norm": 0.19265806674957275, "learning_rate": 4.837992103254896e-05, "loss": 0.7936848402023315, "step": 2456 }, { "epoch": 3.014723926380368, "grad_norm": 0.2037212997674942, "learning_rate": 4.837814313465135e-05, "loss": 0.8629237413406372, "step": 2457 }, { "epoch": 3.0159509202453987, "grad_norm": 0.22378022968769073, "learning_rate": 4.837636429444747e-05, "loss": 0.8194590210914612, "step": 2458 }, { "epoch": 3.0171779141104293, "grad_norm": 0.2423987239599228, "learning_rate": 4.837458451200903e-05, "loss": 0.8910950422286987, "step": 2459 }, { "epoch": 3.01840490797546, "grad_norm": 0.20275750756263733, "learning_rate": 4.837280378740774e-05, "loss": 0.8421240448951721, "step": 2460 }, { "epoch": 3.0196319018404907, "grad_norm": 0.20557506382465363, "learning_rate": 4.837102212071539e-05, "loss": 0.8759689331054688, "step": 2461 }, { "epoch": 3.0208588957055214, "grad_norm": 0.18264919519424438, "learning_rate": 4.836923951200381e-05, "loss": 0.9511744976043701, "step": 2462 }, { "epoch": 3.022085889570552, "grad_norm": 0.2161438763141632, "learning_rate": 4.836745596134482e-05, "loss": 0.7121752500534058, "step": 2463 }, { "epoch": 3.0233128834355827, "grad_norm": 0.19644905626773834, "learning_rate": 4.8365671468810325e-05, "loss": 0.9437732100486755, "step": 2464 }, { "epoch": 3.0245398773006134, "grad_norm": 0.1828211098909378, "learning_rate": 4.8363886034472264e-05, "loss": 0.8361877202987671, "step": 2465 }, { "epoch": 3.025766871165644, "grad_norm": 0.2122402936220169, "learning_rate": 4.836209965840259e-05, "loss": 0.6321042776107788, "step": 2466 }, { "epoch": 3.0269938650306747, "grad_norm": 0.24584083259105682, "learning_rate": 4.836031234067331e-05, "loss": 0.9407027959823608, "step": 2467 }, { "epoch": 3.0282208588957054, "grad_norm": 0.18748658895492554, "learning_rate": 4.835852408135646e-05, "loss": 0.8988504409790039, "step": 2468 }, { "epoch": 3.029447852760736, "grad_norm": 0.19516298174858093, "learning_rate": 4.835673488052413e-05, "loss": 0.8675650358200073, "step": 2469 }, { "epoch": 3.0306748466257667, "grad_norm": 0.16651201248168945, "learning_rate": 4.835494473824843e-05, "loss": 0.8705756664276123, "step": 2470 }, { "epoch": 3.0319018404907974, "grad_norm": 0.2145835906267166, "learning_rate": 4.8353153654601523e-05, "loss": 0.9743080139160156, "step": 2471 }, { "epoch": 3.033128834355828, "grad_norm": 0.19211581349372864, "learning_rate": 4.8351361629655595e-05, "loss": 0.7162221670150757, "step": 2472 }, { "epoch": 3.0343558282208587, "grad_norm": 0.202417254447937, "learning_rate": 4.834956866348288e-05, "loss": 0.8591035604476929, "step": 2473 }, { "epoch": 3.0355828220858894, "grad_norm": 0.23525942862033844, "learning_rate": 4.834777475615564e-05, "loss": 0.8753349781036377, "step": 2474 }, { "epoch": 3.03680981595092, "grad_norm": 0.21886558830738068, "learning_rate": 4.83459799077462e-05, "loss": 0.8357034921646118, "step": 2475 }, { "epoch": 3.038036809815951, "grad_norm": 0.18057861924171448, "learning_rate": 4.834418411832689e-05, "loss": 0.9199501276016235, "step": 2476 }, { "epoch": 3.039263803680982, "grad_norm": 0.25274601578712463, "learning_rate": 4.83423873879701e-05, "loss": 0.8443843126296997, "step": 2477 }, { "epoch": 3.0404907975460125, "grad_norm": 0.18128377199172974, "learning_rate": 4.834058971674825e-05, "loss": 0.8738611936569214, "step": 2478 }, { "epoch": 3.041717791411043, "grad_norm": 0.18694832921028137, "learning_rate": 4.8338791104733795e-05, "loss": 0.7910598516464233, "step": 2479 }, { "epoch": 3.042944785276074, "grad_norm": 0.19539020955562592, "learning_rate": 4.833699155199923e-05, "loss": 0.8604792356491089, "step": 2480 }, { "epoch": 3.0441717791411045, "grad_norm": 0.2120741903781891, "learning_rate": 4.83351910586171e-05, "loss": 0.7954474091529846, "step": 2481 }, { "epoch": 3.045398773006135, "grad_norm": 0.21529847383499146, "learning_rate": 4.8333389624659975e-05, "loss": 0.8265784978866577, "step": 2482 }, { "epoch": 3.046625766871166, "grad_norm": 0.22524625062942505, "learning_rate": 4.833158725020046e-05, "loss": 0.9817146062850952, "step": 2483 }, { "epoch": 3.0478527607361965, "grad_norm": 0.1692521572113037, "learning_rate": 4.832978393531121e-05, "loss": 1.0033811330795288, "step": 2484 }, { "epoch": 3.049079754601227, "grad_norm": 0.19200658798217773, "learning_rate": 4.83279796800649e-05, "loss": 0.9177238941192627, "step": 2485 }, { "epoch": 3.050306748466258, "grad_norm": 0.1592865288257599, "learning_rate": 4.832617448453427e-05, "loss": 0.8145633339881897, "step": 2486 }, { "epoch": 3.0515337423312885, "grad_norm": 0.1806468963623047, "learning_rate": 4.832436834879207e-05, "loss": 0.7117313742637634, "step": 2487 }, { "epoch": 3.052760736196319, "grad_norm": 0.23279601335525513, "learning_rate": 4.832256127291111e-05, "loss": 0.7265002727508545, "step": 2488 }, { "epoch": 3.05398773006135, "grad_norm": 0.19345329701900482, "learning_rate": 4.832075325696423e-05, "loss": 0.9046744108200073, "step": 2489 }, { "epoch": 3.0552147239263805, "grad_norm": 0.19717605412006378, "learning_rate": 4.8318944301024284e-05, "loss": 0.8902974724769592, "step": 2490 }, { "epoch": 3.056441717791411, "grad_norm": 0.1485026329755783, "learning_rate": 4.83171344051642e-05, "loss": 0.8626710176467896, "step": 2491 }, { "epoch": 3.057668711656442, "grad_norm": 0.22970280051231384, "learning_rate": 4.831532356945694e-05, "loss": 0.6816619634628296, "step": 2492 }, { "epoch": 3.0588957055214725, "grad_norm": 0.1883392333984375, "learning_rate": 4.831351179397549e-05, "loss": 0.8426380157470703, "step": 2493 }, { "epoch": 3.060122699386503, "grad_norm": 0.2079700082540512, "learning_rate": 4.831169907879286e-05, "loss": 0.8493733406066895, "step": 2494 }, { "epoch": 3.061349693251534, "grad_norm": 0.29136577248573303, "learning_rate": 4.830988542398213e-05, "loss": 0.7590813636779785, "step": 2495 }, { "epoch": 3.0625766871165645, "grad_norm": 0.19628466665744781, "learning_rate": 4.83080708296164e-05, "loss": 0.9015249013900757, "step": 2496 }, { "epoch": 3.063803680981595, "grad_norm": 0.24333401024341583, "learning_rate": 4.8306255295768807e-05, "loss": 0.7671120166778564, "step": 2497 }, { "epoch": 3.065030674846626, "grad_norm": 0.24694447219371796, "learning_rate": 4.8304438822512545e-05, "loss": 0.7178539037704468, "step": 2498 }, { "epoch": 3.0662576687116565, "grad_norm": 0.20077498257160187, "learning_rate": 4.830262140992082e-05, "loss": 0.9080960750579834, "step": 2499 }, { "epoch": 3.067484662576687, "grad_norm": 0.1857178807258606, "learning_rate": 4.830080305806688e-05, "loss": 0.8830072283744812, "step": 2500 }, { "epoch": 3.068711656441718, "grad_norm": 0.19991706311702728, "learning_rate": 4.829898376702403e-05, "loss": 0.7953039407730103, "step": 2501 }, { "epoch": 3.0699386503067485, "grad_norm": 0.24294327199459076, "learning_rate": 4.8297163536865584e-05, "loss": 0.5701331496238708, "step": 2502 }, { "epoch": 3.071165644171779, "grad_norm": 0.210518017411232, "learning_rate": 4.829534236766494e-05, "loss": 0.762244462966919, "step": 2503 }, { "epoch": 3.07239263803681, "grad_norm": 0.21915341913700104, "learning_rate": 4.829352025949547e-05, "loss": 0.7864232063293457, "step": 2504 }, { "epoch": 3.0736196319018405, "grad_norm": 0.24288903176784515, "learning_rate": 4.829169721243064e-05, "loss": 0.8091459274291992, "step": 2505 }, { "epoch": 3.074846625766871, "grad_norm": 0.20091409981250763, "learning_rate": 4.828987322654392e-05, "loss": 0.9078545570373535, "step": 2506 }, { "epoch": 3.076073619631902, "grad_norm": 0.2356826812028885, "learning_rate": 4.828804830190884e-05, "loss": 0.8345551490783691, "step": 2507 }, { "epoch": 3.0773006134969325, "grad_norm": 0.20131583511829376, "learning_rate": 4.828622243859896e-05, "loss": 0.9775334596633911, "step": 2508 }, { "epoch": 3.078527607361963, "grad_norm": 0.2284579873085022, "learning_rate": 4.8284395636687854e-05, "loss": 0.8412786722183228, "step": 2509 }, { "epoch": 3.079754601226994, "grad_norm": 0.19789013266563416, "learning_rate": 4.8282567896249174e-05, "loss": 0.8620169162750244, "step": 2510 }, { "epoch": 3.0809815950920245, "grad_norm": 0.2271234095096588, "learning_rate": 4.828073921735659e-05, "loss": 0.6765434741973877, "step": 2511 }, { "epoch": 3.082208588957055, "grad_norm": 0.1727326214313507, "learning_rate": 4.8278909600083806e-05, "loss": 0.8249396085739136, "step": 2512 }, { "epoch": 3.083435582822086, "grad_norm": 0.23738177120685577, "learning_rate": 4.827707904450457e-05, "loss": 0.847784161567688, "step": 2513 }, { "epoch": 3.0846625766871165, "grad_norm": 0.19503331184387207, "learning_rate": 4.827524755069266e-05, "loss": 0.7218028903007507, "step": 2514 }, { "epoch": 3.085889570552147, "grad_norm": 0.21843723952770233, "learning_rate": 4.827341511872191e-05, "loss": 0.6872543096542358, "step": 2515 }, { "epoch": 3.087116564417178, "grad_norm": 0.19146235287189484, "learning_rate": 4.827158174866617e-05, "loss": 0.8804225921630859, "step": 2516 }, { "epoch": 3.0883435582822085, "grad_norm": 0.2167656272649765, "learning_rate": 4.8269747440599344e-05, "loss": 0.7898445129394531, "step": 2517 }, { "epoch": 3.089570552147239, "grad_norm": 0.20603497326374054, "learning_rate": 4.826791219459537e-05, "loss": 0.7701493501663208, "step": 2518 }, { "epoch": 3.09079754601227, "grad_norm": 0.20212122797966003, "learning_rate": 4.826607601072821e-05, "loss": 0.8050695657730103, "step": 2519 }, { "epoch": 3.0920245398773005, "grad_norm": 0.19543804228305817, "learning_rate": 4.826423888907189e-05, "loss": 0.6723787784576416, "step": 2520 }, { "epoch": 3.093251533742331, "grad_norm": 0.21179142594337463, "learning_rate": 4.826240082970045e-05, "loss": 0.7587571740150452, "step": 2521 }, { "epoch": 3.094478527607362, "grad_norm": 0.26298657059669495, "learning_rate": 4.826056183268798e-05, "loss": 0.5821501016616821, "step": 2522 }, { "epoch": 3.0957055214723925, "grad_norm": 0.18777908384799957, "learning_rate": 4.8258721898108604e-05, "loss": 0.7918263673782349, "step": 2523 }, { "epoch": 3.096932515337423, "grad_norm": 0.2044987678527832, "learning_rate": 4.825688102603648e-05, "loss": 1.0261898040771484, "step": 2524 }, { "epoch": 3.098159509202454, "grad_norm": 0.18249180912971497, "learning_rate": 4.8255039216545814e-05, "loss": 0.9120903015136719, "step": 2525 }, { "epoch": 3.0993865030674845, "grad_norm": 0.241263747215271, "learning_rate": 4.825319646971085e-05, "loss": 0.6870057582855225, "step": 2526 }, { "epoch": 3.100613496932515, "grad_norm": 0.21870394051074982, "learning_rate": 4.825135278560584e-05, "loss": 0.8351167440414429, "step": 2527 }, { "epoch": 3.101840490797546, "grad_norm": 0.21839629113674164, "learning_rate": 4.824950816430513e-05, "loss": 0.7339568138122559, "step": 2528 }, { "epoch": 3.1030674846625765, "grad_norm": 0.23636192083358765, "learning_rate": 4.824766260588305e-05, "loss": 0.7834730744361877, "step": 2529 }, { "epoch": 3.104294478527607, "grad_norm": 0.22150738537311554, "learning_rate": 4.824581611041399e-05, "loss": 0.9770652055740356, "step": 2530 }, { "epoch": 3.105521472392638, "grad_norm": 0.1804201304912567, "learning_rate": 4.824396867797239e-05, "loss": 0.9991258382797241, "step": 2531 }, { "epoch": 3.1067484662576685, "grad_norm": 0.17382420599460602, "learning_rate": 4.82421203086327e-05, "loss": 0.996586263179779, "step": 2532 }, { "epoch": 3.107975460122699, "grad_norm": 0.18637587130069733, "learning_rate": 4.824027100246943e-05, "loss": 1.0403392314910889, "step": 2533 }, { "epoch": 3.10920245398773, "grad_norm": 0.2692403197288513, "learning_rate": 4.823842075955712e-05, "loss": 0.4825896620750427, "step": 2534 }, { "epoch": 3.1104294478527605, "grad_norm": 0.1885714828968048, "learning_rate": 4.823656957997035e-05, "loss": 0.9365739822387695, "step": 2535 }, { "epoch": 3.111656441717791, "grad_norm": 0.20166316628456116, "learning_rate": 4.8234717463783736e-05, "loss": 0.939531147480011, "step": 2536 }, { "epoch": 3.112883435582822, "grad_norm": 0.23865285515785217, "learning_rate": 4.823286441107192e-05, "loss": 0.8512439727783203, "step": 2537 }, { "epoch": 3.1141104294478525, "grad_norm": 0.21733315289020538, "learning_rate": 4.8231010421909605e-05, "loss": 0.852358341217041, "step": 2538 }, { "epoch": 3.1153374233128837, "grad_norm": 0.22198867797851562, "learning_rate": 4.822915549637152e-05, "loss": 0.8576400876045227, "step": 2539 }, { "epoch": 3.116564417177914, "grad_norm": 0.1727404147386551, "learning_rate": 4.822729963453243e-05, "loss": 0.7359638214111328, "step": 2540 }, { "epoch": 3.117791411042945, "grad_norm": 0.2170976996421814, "learning_rate": 4.822544283646713e-05, "loss": 0.7504830360412598, "step": 2541 }, { "epoch": 3.1190184049079757, "grad_norm": 0.22372442483901978, "learning_rate": 4.822358510225047e-05, "loss": 0.736099123954773, "step": 2542 }, { "epoch": 3.1202453987730063, "grad_norm": 0.19926370680332184, "learning_rate": 4.822172643195734e-05, "loss": 0.7426764369010925, "step": 2543 }, { "epoch": 3.121472392638037, "grad_norm": 0.17373526096343994, "learning_rate": 4.821986682566264e-05, "loss": 0.8818531036376953, "step": 2544 }, { "epoch": 3.1226993865030677, "grad_norm": 0.21371804177761078, "learning_rate": 4.8218006283441335e-05, "loss": 0.7917479276657104, "step": 2545 }, { "epoch": 3.1239263803680983, "grad_norm": 0.20293530821800232, "learning_rate": 4.821614480536842e-05, "loss": 0.7495874166488647, "step": 2546 }, { "epoch": 3.125153374233129, "grad_norm": 0.1664031744003296, "learning_rate": 4.821428239151892e-05, "loss": 0.8363022804260254, "step": 2547 }, { "epoch": 3.1263803680981597, "grad_norm": 0.251348614692688, "learning_rate": 4.8212419041967906e-05, "loss": 0.8079566955566406, "step": 2548 }, { "epoch": 3.1276073619631903, "grad_norm": 0.19406741857528687, "learning_rate": 4.821055475679048e-05, "loss": 0.9412163496017456, "step": 2549 }, { "epoch": 3.128834355828221, "grad_norm": 0.2658458650112152, "learning_rate": 4.82086895360618e-05, "loss": 0.80919349193573, "step": 2550 }, { "epoch": 3.1300613496932517, "grad_norm": 0.17668147385120392, "learning_rate": 4.820682337985703e-05, "loss": 0.8308618664741516, "step": 2551 }, { "epoch": 3.1312883435582823, "grad_norm": 0.226984441280365, "learning_rate": 4.82049562882514e-05, "loss": 0.7604982852935791, "step": 2552 }, { "epoch": 3.132515337423313, "grad_norm": 0.21327246725559235, "learning_rate": 4.820308826132016e-05, "loss": 0.7821786403656006, "step": 2553 }, { "epoch": 3.1337423312883437, "grad_norm": 0.22024209797382355, "learning_rate": 4.8201219299138624e-05, "loss": 0.7897942066192627, "step": 2554 }, { "epoch": 3.1349693251533743, "grad_norm": 0.2863866984844208, "learning_rate": 4.8199349401782095e-05, "loss": 0.7321205735206604, "step": 2555 }, { "epoch": 3.136196319018405, "grad_norm": 0.24209001660346985, "learning_rate": 4.819747856932596e-05, "loss": 0.8151949048042297, "step": 2556 }, { "epoch": 3.1374233128834357, "grad_norm": 0.25400006771087646, "learning_rate": 4.819560680184563e-05, "loss": 0.6177437901496887, "step": 2557 }, { "epoch": 3.1386503067484663, "grad_norm": 0.18606935441493988, "learning_rate": 4.819373409941654e-05, "loss": 0.9972411394119263, "step": 2558 }, { "epoch": 3.139877300613497, "grad_norm": 0.19575448334217072, "learning_rate": 4.819186046211419e-05, "loss": 0.8485220670700073, "step": 2559 }, { "epoch": 3.1411042944785277, "grad_norm": 0.2301051914691925, "learning_rate": 4.8189985890014086e-05, "loss": 0.8860222101211548, "step": 2560 }, { "epoch": 3.1423312883435583, "grad_norm": 0.20998762547969818, "learning_rate": 4.8188110383191784e-05, "loss": 0.8571518659591675, "step": 2561 }, { "epoch": 3.143558282208589, "grad_norm": 0.20286022126674652, "learning_rate": 4.818623394172289e-05, "loss": 0.9173754453659058, "step": 2562 }, { "epoch": 3.1447852760736197, "grad_norm": 0.2251025289297104, "learning_rate": 4.818435656568304e-05, "loss": 0.808631420135498, "step": 2563 }, { "epoch": 3.1460122699386504, "grad_norm": 0.2362525761127472, "learning_rate": 4.8182478255147894e-05, "loss": 0.7619195580482483, "step": 2564 }, { "epoch": 3.147239263803681, "grad_norm": 0.22170941531658173, "learning_rate": 4.8180599010193176e-05, "loss": 0.7556285858154297, "step": 2565 }, { "epoch": 3.1484662576687117, "grad_norm": 0.21077515184879303, "learning_rate": 4.8178718830894614e-05, "loss": 0.8413832187652588, "step": 2566 }, { "epoch": 3.1496932515337424, "grad_norm": 0.20096607506275177, "learning_rate": 4.817683771732801e-05, "loss": 0.8501499891281128, "step": 2567 }, { "epoch": 3.150920245398773, "grad_norm": 0.26718026399612427, "learning_rate": 4.817495566956918e-05, "loss": 0.724471926689148, "step": 2568 }, { "epoch": 3.1521472392638037, "grad_norm": 0.2394062727689743, "learning_rate": 4.817307268769398e-05, "loss": 0.8381637334823608, "step": 2569 }, { "epoch": 3.1533742331288344, "grad_norm": 0.2180698812007904, "learning_rate": 4.8171188771778316e-05, "loss": 0.8174648284912109, "step": 2570 }, { "epoch": 3.154601226993865, "grad_norm": 0.20485053956508636, "learning_rate": 4.8169303921898125e-05, "loss": 0.7907076478004456, "step": 2571 }, { "epoch": 3.1558282208588957, "grad_norm": 0.18250785768032074, "learning_rate": 4.816741813812936e-05, "loss": 0.8278086185455322, "step": 2572 }, { "epoch": 3.1570552147239264, "grad_norm": 0.2963041365146637, "learning_rate": 4.816553142054805e-05, "loss": 0.6915886402130127, "step": 2573 }, { "epoch": 3.158282208588957, "grad_norm": 0.22184424102306366, "learning_rate": 4.816364376923025e-05, "loss": 0.8667279481887817, "step": 2574 }, { "epoch": 3.1595092024539877, "grad_norm": 0.1963621973991394, "learning_rate": 4.816175518425202e-05, "loss": 0.7836471796035767, "step": 2575 }, { "epoch": 3.1607361963190184, "grad_norm": 0.20668338239192963, "learning_rate": 4.8159865665689506e-05, "loss": 0.8552612662315369, "step": 2576 }, { "epoch": 3.161963190184049, "grad_norm": 0.22604842483997345, "learning_rate": 4.815797521361886e-05, "loss": 0.7633973360061646, "step": 2577 }, { "epoch": 3.1631901840490797, "grad_norm": 0.30276426672935486, "learning_rate": 4.815608382811628e-05, "loss": 0.6574289798736572, "step": 2578 }, { "epoch": 3.1644171779141104, "grad_norm": 0.18501850962638855, "learning_rate": 4.8154191509258e-05, "loss": 0.8175626993179321, "step": 2579 }, { "epoch": 3.165644171779141, "grad_norm": 0.21730637550354004, "learning_rate": 4.81522982571203e-05, "loss": 0.6922441720962524, "step": 2580 }, { "epoch": 3.1668711656441717, "grad_norm": 0.19876030087471008, "learning_rate": 4.81504040717795e-05, "loss": 0.7225267887115479, "step": 2581 }, { "epoch": 3.1680981595092024, "grad_norm": 0.20521222054958344, "learning_rate": 4.814850895331192e-05, "loss": 0.7990577816963196, "step": 2582 }, { "epoch": 3.169325153374233, "grad_norm": 0.23005908727645874, "learning_rate": 4.814661290179398e-05, "loss": 0.8793095350265503, "step": 2583 }, { "epoch": 3.1705521472392637, "grad_norm": 0.2297876626253128, "learning_rate": 4.814471591730208e-05, "loss": 0.9376273155212402, "step": 2584 }, { "epoch": 3.1717791411042944, "grad_norm": 0.20415811240673065, "learning_rate": 4.814281799991271e-05, "loss": 0.7897652387619019, "step": 2585 }, { "epoch": 3.173006134969325, "grad_norm": 0.22282205522060394, "learning_rate": 4.814091914970234e-05, "loss": 0.8221826553344727, "step": 2586 }, { "epoch": 3.1742331288343557, "grad_norm": 0.19682158529758453, "learning_rate": 4.813901936674752e-05, "loss": 0.8292348384857178, "step": 2587 }, { "epoch": 3.1754601226993864, "grad_norm": 0.25028181076049805, "learning_rate": 4.8137118651124824e-05, "loss": 0.7349518537521362, "step": 2588 }, { "epoch": 3.176687116564417, "grad_norm": 0.18455909192562103, "learning_rate": 4.813521700291086e-05, "loss": 0.955096960067749, "step": 2589 }, { "epoch": 3.1779141104294477, "grad_norm": 0.22652797400951385, "learning_rate": 4.813331442218228e-05, "loss": 0.8956960439682007, "step": 2590 }, { "epoch": 3.1791411042944784, "grad_norm": 0.2709456980228424, "learning_rate": 4.813141090901578e-05, "loss": 0.7584890723228455, "step": 2591 }, { "epoch": 3.180368098159509, "grad_norm": 0.18335096538066864, "learning_rate": 4.8129506463488084e-05, "loss": 0.8544306755065918, "step": 2592 }, { "epoch": 3.1815950920245397, "grad_norm": 0.19247117638587952, "learning_rate": 4.8127601085675945e-05, "loss": 0.8085331320762634, "step": 2593 }, { "epoch": 3.1828220858895704, "grad_norm": 0.24170352518558502, "learning_rate": 4.8125694775656174e-05, "loss": 0.8032072186470032, "step": 2594 }, { "epoch": 3.184049079754601, "grad_norm": 0.18164587020874023, "learning_rate": 4.812378753350559e-05, "loss": 0.8523681163787842, "step": 2595 }, { "epoch": 3.1852760736196317, "grad_norm": 0.22759433090686798, "learning_rate": 4.812187935930109e-05, "loss": 0.9172079563140869, "step": 2596 }, { "epoch": 3.1865030674846624, "grad_norm": 0.2679084539413452, "learning_rate": 4.8119970253119585e-05, "loss": 0.7363642454147339, "step": 2597 }, { "epoch": 3.187730061349693, "grad_norm": 0.24303090572357178, "learning_rate": 4.811806021503801e-05, "loss": 0.7257018089294434, "step": 2598 }, { "epoch": 3.1889570552147237, "grad_norm": 0.20925243198871613, "learning_rate": 4.811614924513336e-05, "loss": 0.8869308233261108, "step": 2599 }, { "epoch": 3.190184049079755, "grad_norm": 0.22847241163253784, "learning_rate": 4.811423734348267e-05, "loss": 0.6915886402130127, "step": 2600 }, { "epoch": 3.191411042944785, "grad_norm": 0.17009517550468445, "learning_rate": 4.811232451016299e-05, "loss": 0.8924528360366821, "step": 2601 }, { "epoch": 3.192638036809816, "grad_norm": 0.222347691655159, "learning_rate": 4.8110410745251435e-05, "loss": 0.8890141248703003, "step": 2602 }, { "epoch": 3.1938650306748464, "grad_norm": 0.2554726302623749, "learning_rate": 4.810849604882514e-05, "loss": 0.5917946100234985, "step": 2603 }, { "epoch": 3.1950920245398775, "grad_norm": 0.2330615222454071, "learning_rate": 4.810658042096126e-05, "loss": 0.9625523090362549, "step": 2604 }, { "epoch": 3.196319018404908, "grad_norm": 0.22099195420742035, "learning_rate": 4.810466386173704e-05, "loss": 0.8642259836196899, "step": 2605 }, { "epoch": 3.197546012269939, "grad_norm": 0.18924465775489807, "learning_rate": 4.810274637122971e-05, "loss": 0.9261512160301208, "step": 2606 }, { "epoch": 3.1987730061349695, "grad_norm": 0.21399010717868805, "learning_rate": 4.810082794951656e-05, "loss": 0.7626092433929443, "step": 2607 }, { "epoch": 3.2, "grad_norm": 0.2147710919380188, "learning_rate": 4.809890859667493e-05, "loss": 0.8998029232025146, "step": 2608 }, { "epoch": 3.201226993865031, "grad_norm": 0.18023547530174255, "learning_rate": 4.8096988312782174e-05, "loss": 0.8728809952735901, "step": 2609 }, { "epoch": 3.2024539877300615, "grad_norm": 0.2017984241247177, "learning_rate": 4.809506709791569e-05, "loss": 0.7189891338348389, "step": 2610 }, { "epoch": 3.203680981595092, "grad_norm": 0.22939005494117737, "learning_rate": 4.809314495215292e-05, "loss": 0.9575088024139404, "step": 2611 }, { "epoch": 3.204907975460123, "grad_norm": 0.20589090883731842, "learning_rate": 4.809122187557134e-05, "loss": 0.761506199836731, "step": 2612 }, { "epoch": 3.2061349693251535, "grad_norm": 0.1847018450498581, "learning_rate": 4.808929786824847e-05, "loss": 0.8111155033111572, "step": 2613 }, { "epoch": 3.207361963190184, "grad_norm": 0.19023087620735168, "learning_rate": 4.8087372930261856e-05, "loss": 0.9158420562744141, "step": 2614 }, { "epoch": 3.208588957055215, "grad_norm": 0.24115914106369019, "learning_rate": 4.8085447061689085e-05, "loss": 0.935409665107727, "step": 2615 }, { "epoch": 3.2098159509202455, "grad_norm": 0.24973639845848083, "learning_rate": 4.8083520262607775e-05, "loss": 0.6444511413574219, "step": 2616 }, { "epoch": 3.211042944785276, "grad_norm": 0.2161182016134262, "learning_rate": 4.808159253309561e-05, "loss": 0.7447251677513123, "step": 2617 }, { "epoch": 3.212269938650307, "grad_norm": 0.1733931452035904, "learning_rate": 4.807966387323027e-05, "loss": 0.9881627559661865, "step": 2618 }, { "epoch": 3.2134969325153375, "grad_norm": 0.18464155495166779, "learning_rate": 4.807773428308951e-05, "loss": 0.840694010257721, "step": 2619 }, { "epoch": 3.214723926380368, "grad_norm": 0.23027491569519043, "learning_rate": 4.807580376275111e-05, "loss": 0.8051727414131165, "step": 2620 }, { "epoch": 3.215950920245399, "grad_norm": 0.19767925143241882, "learning_rate": 4.8073872312292866e-05, "loss": 0.8186767101287842, "step": 2621 }, { "epoch": 3.2171779141104295, "grad_norm": 0.19804343581199646, "learning_rate": 4.8071939931792644e-05, "loss": 0.8404213190078735, "step": 2622 }, { "epoch": 3.21840490797546, "grad_norm": 0.17209939658641815, "learning_rate": 4.807000662132832e-05, "loss": 0.8609628677368164, "step": 2623 }, { "epoch": 3.219631901840491, "grad_norm": 0.17859746515750885, "learning_rate": 4.806807238097783e-05, "loss": 0.8645005226135254, "step": 2624 }, { "epoch": 3.2208588957055215, "grad_norm": 0.20394480228424072, "learning_rate": 4.8066137210819126e-05, "loss": 0.7318133115768433, "step": 2625 }, { "epoch": 3.222085889570552, "grad_norm": 0.2177344262599945, "learning_rate": 4.806420111093023e-05, "loss": 0.9102264642715454, "step": 2626 }, { "epoch": 3.223312883435583, "grad_norm": 0.21555253863334656, "learning_rate": 4.8062264081389164e-05, "loss": 0.9589451551437378, "step": 2627 }, { "epoch": 3.2245398773006135, "grad_norm": 0.2104564607143402, "learning_rate": 4.8060326122274006e-05, "loss": 0.7577084302902222, "step": 2628 }, { "epoch": 3.225766871165644, "grad_norm": 0.2522529065608978, "learning_rate": 4.805838723366288e-05, "loss": 0.6356327533721924, "step": 2629 }, { "epoch": 3.226993865030675, "grad_norm": 0.18915851414203644, "learning_rate": 4.805644741563392e-05, "loss": 0.781217634677887, "step": 2630 }, { "epoch": 3.2282208588957055, "grad_norm": 0.21668507158756256, "learning_rate": 4.8054506668265324e-05, "loss": 0.8870527148246765, "step": 2631 }, { "epoch": 3.229447852760736, "grad_norm": 0.22914451360702515, "learning_rate": 4.8052564991635325e-05, "loss": 0.763473629951477, "step": 2632 }, { "epoch": 3.230674846625767, "grad_norm": 0.15986905992031097, "learning_rate": 4.8050622385822176e-05, "loss": 1.0380555391311646, "step": 2633 }, { "epoch": 3.2319018404907975, "grad_norm": 0.2175728976726532, "learning_rate": 4.804867885090418e-05, "loss": 0.9332126379013062, "step": 2634 }, { "epoch": 3.233128834355828, "grad_norm": 0.19476434588432312, "learning_rate": 4.804673438695967e-05, "loss": 0.8290810585021973, "step": 2635 }, { "epoch": 3.234355828220859, "grad_norm": 0.21672844886779785, "learning_rate": 4.804478899406704e-05, "loss": 0.867931604385376, "step": 2636 }, { "epoch": 3.2355828220858895, "grad_norm": 0.1981469988822937, "learning_rate": 4.804284267230468e-05, "loss": 0.7955286502838135, "step": 2637 }, { "epoch": 3.23680981595092, "grad_norm": 0.1807359904050827, "learning_rate": 4.804089542175106e-05, "loss": 0.759442925453186, "step": 2638 }, { "epoch": 3.238036809815951, "grad_norm": 0.21859531104564667, "learning_rate": 4.803894724248466e-05, "loss": 1.05955970287323, "step": 2639 }, { "epoch": 3.2392638036809815, "grad_norm": 0.19152292609214783, "learning_rate": 4.8036998134584e-05, "loss": 0.7738809585571289, "step": 2640 }, { "epoch": 3.240490797546012, "grad_norm": 0.19899970293045044, "learning_rate": 4.803504809812766e-05, "loss": 0.8978227972984314, "step": 2641 }, { "epoch": 3.241717791411043, "grad_norm": 0.2433260977268219, "learning_rate": 4.803309713319422e-05, "loss": 0.7026435136795044, "step": 2642 }, { "epoch": 3.2429447852760735, "grad_norm": 0.22778038680553436, "learning_rate": 4.803114523986233e-05, "loss": 0.8681850433349609, "step": 2643 }, { "epoch": 3.244171779141104, "grad_norm": 0.20613905787467957, "learning_rate": 4.802919241821066e-05, "loss": 0.7402520179748535, "step": 2644 }, { "epoch": 3.245398773006135, "grad_norm": 0.18895027041435242, "learning_rate": 4.802723866831793e-05, "loss": 0.7751574516296387, "step": 2645 }, { "epoch": 3.2466257668711656, "grad_norm": 0.1951705664396286, "learning_rate": 4.802528399026288e-05, "loss": 0.7765336036682129, "step": 2646 }, { "epoch": 3.247852760736196, "grad_norm": 0.20967519283294678, "learning_rate": 4.80233283841243e-05, "loss": 0.8001947999000549, "step": 2647 }, { "epoch": 3.249079754601227, "grad_norm": 0.2502484619617462, "learning_rate": 4.802137184998102e-05, "loss": 0.7834466695785522, "step": 2648 }, { "epoch": 3.2503067484662576, "grad_norm": 0.2437153458595276, "learning_rate": 4.8019414387911906e-05, "loss": 0.9803346395492554, "step": 2649 }, { "epoch": 3.2515337423312882, "grad_norm": 0.21471720933914185, "learning_rate": 4.8017455997995844e-05, "loss": 0.8008378744125366, "step": 2650 }, { "epoch": 3.252760736196319, "grad_norm": 0.2498689591884613, "learning_rate": 4.8015496680311774e-05, "loss": 0.701156497001648, "step": 2651 }, { "epoch": 3.2539877300613496, "grad_norm": 0.21851713955402374, "learning_rate": 4.801353643493869e-05, "loss": 0.7433645129203796, "step": 2652 }, { "epoch": 3.2552147239263802, "grad_norm": 0.2948942482471466, "learning_rate": 4.8011575261955575e-05, "loss": 0.770165205001831, "step": 2653 }, { "epoch": 3.256441717791411, "grad_norm": 0.2088811993598938, "learning_rate": 4.80096131614415e-05, "loss": 1.071920394897461, "step": 2654 }, { "epoch": 3.2576687116564416, "grad_norm": 0.21293030679225922, "learning_rate": 4.800765013347555e-05, "loss": 0.7616757154464722, "step": 2655 }, { "epoch": 3.2588957055214722, "grad_norm": 0.21667350828647614, "learning_rate": 4.800568617813683e-05, "loss": 0.9188454151153564, "step": 2656 }, { "epoch": 3.260122699386503, "grad_norm": 0.16677074134349823, "learning_rate": 4.800372129550452e-05, "loss": 0.9158051013946533, "step": 2657 }, { "epoch": 3.2613496932515336, "grad_norm": 0.20286686718463898, "learning_rate": 4.8001755485657815e-05, "loss": 0.7015013694763184, "step": 2658 }, { "epoch": 3.2625766871165642, "grad_norm": 0.20519526302814484, "learning_rate": 4.7999788748675943e-05, "loss": 0.8920719623565674, "step": 2659 }, { "epoch": 3.263803680981595, "grad_norm": 0.16634900867938995, "learning_rate": 4.7997821084638196e-05, "loss": 0.9753339290618896, "step": 2660 }, { "epoch": 3.265030674846626, "grad_norm": 0.28639712929725647, "learning_rate": 4.799585249362386e-05, "loss": 0.5274684429168701, "step": 2661 }, { "epoch": 3.2662576687116562, "grad_norm": 0.22865983843803406, "learning_rate": 4.79938829757123e-05, "loss": 0.8810487389564514, "step": 2662 }, { "epoch": 3.2674846625766873, "grad_norm": 0.20749054849147797, "learning_rate": 4.79919125309829e-05, "loss": 0.926489531993866, "step": 2663 }, { "epoch": 3.2687116564417176, "grad_norm": 0.23185019195079803, "learning_rate": 4.7989941159515086e-05, "loss": 0.6712583899497986, "step": 2664 }, { "epoch": 3.2699386503067487, "grad_norm": 0.24005573987960815, "learning_rate": 4.798796886138831e-05, "loss": 0.7405828237533569, "step": 2665 }, { "epoch": 3.271165644171779, "grad_norm": 0.1861313432455063, "learning_rate": 4.798599563668206e-05, "loss": 0.8958418369293213, "step": 2666 }, { "epoch": 3.27239263803681, "grad_norm": 0.1898956596851349, "learning_rate": 4.79840214854759e-05, "loss": 0.8533444404602051, "step": 2667 }, { "epoch": 3.2736196319018402, "grad_norm": 0.2509891092777252, "learning_rate": 4.798204640784938e-05, "loss": 0.7773140668869019, "step": 2668 }, { "epoch": 3.2748466257668714, "grad_norm": 0.20707912743091583, "learning_rate": 4.798007040388212e-05, "loss": 0.7730070948600769, "step": 2669 }, { "epoch": 3.276073619631902, "grad_norm": 0.21272745728492737, "learning_rate": 4.797809347365376e-05, "loss": 0.8668553233146667, "step": 2670 }, { "epoch": 3.2773006134969327, "grad_norm": 0.23868712782859802, "learning_rate": 4.7976115617243985e-05, "loss": 0.7776104211807251, "step": 2671 }, { "epoch": 3.2785276073619634, "grad_norm": 0.18894629180431366, "learning_rate": 4.797413683473252e-05, "loss": 0.816778838634491, "step": 2672 }, { "epoch": 3.279754601226994, "grad_norm": 0.21345782279968262, "learning_rate": 4.7972157126199124e-05, "loss": 0.83598393201828, "step": 2673 }, { "epoch": 3.2809815950920247, "grad_norm": 0.2608429789543152, "learning_rate": 4.7970176491723596e-05, "loss": 0.7136214971542358, "step": 2674 }, { "epoch": 3.2822085889570554, "grad_norm": 0.2396935373544693, "learning_rate": 4.796819493138577e-05, "loss": 0.7118529081344604, "step": 2675 }, { "epoch": 3.283435582822086, "grad_norm": 0.2127692699432373, "learning_rate": 4.79662124452655e-05, "loss": 0.8512159585952759, "step": 2676 }, { "epoch": 3.2846625766871167, "grad_norm": 0.21707141399383545, "learning_rate": 4.796422903344272e-05, "loss": 0.8610748052597046, "step": 2677 }, { "epoch": 3.2858895705521474, "grad_norm": 0.23797102272510529, "learning_rate": 4.796224469599736e-05, "loss": 0.7638229727745056, "step": 2678 }, { "epoch": 3.287116564417178, "grad_norm": 0.21894580125808716, "learning_rate": 4.796025943300941e-05, "loss": 0.6774592399597168, "step": 2679 }, { "epoch": 3.2883435582822087, "grad_norm": 0.23964467644691467, "learning_rate": 4.795827324455888e-05, "loss": 0.8645141124725342, "step": 2680 }, { "epoch": 3.2895705521472394, "grad_norm": 0.22469156980514526, "learning_rate": 4.795628613072584e-05, "loss": 0.6357400417327881, "step": 2681 }, { "epoch": 3.29079754601227, "grad_norm": 0.19287896156311035, "learning_rate": 4.7954298091590374e-05, "loss": 0.8769205808639526, "step": 2682 }, { "epoch": 3.2920245398773007, "grad_norm": 0.1864778995513916, "learning_rate": 4.795230912723263e-05, "loss": 0.8068448305130005, "step": 2683 }, { "epoch": 3.2932515337423314, "grad_norm": 0.20234838128089905, "learning_rate": 4.7950319237732766e-05, "loss": 0.7468483448028564, "step": 2684 }, { "epoch": 3.294478527607362, "grad_norm": 0.2587863802909851, "learning_rate": 4.794832842317098e-05, "loss": 0.6968777775764465, "step": 2685 }, { "epoch": 3.2957055214723927, "grad_norm": 0.21739165484905243, "learning_rate": 4.7946336683627536e-05, "loss": 0.8233777284622192, "step": 2686 }, { "epoch": 3.2969325153374234, "grad_norm": 0.20896320044994354, "learning_rate": 4.7944344019182696e-05, "loss": 0.8876587748527527, "step": 2687 }, { "epoch": 3.298159509202454, "grad_norm": 0.19991092383861542, "learning_rate": 4.794235042991679e-05, "loss": 0.8322557806968689, "step": 2688 }, { "epoch": 3.2993865030674847, "grad_norm": 0.23538213968276978, "learning_rate": 4.794035591591017e-05, "loss": 0.797191858291626, "step": 2689 }, { "epoch": 3.3006134969325154, "grad_norm": 0.24656932055950165, "learning_rate": 4.793836047724324e-05, "loss": 0.6690322160720825, "step": 2690 }, { "epoch": 3.301840490797546, "grad_norm": 0.20567606389522552, "learning_rate": 4.793636411399641e-05, "loss": 0.7492669820785522, "step": 2691 }, { "epoch": 3.3030674846625767, "grad_norm": 0.2169174700975418, "learning_rate": 4.7934366826250174e-05, "loss": 0.9962924718856812, "step": 2692 }, { "epoch": 3.3042944785276074, "grad_norm": 0.22613489627838135, "learning_rate": 4.793236861408501e-05, "loss": 0.797978401184082, "step": 2693 }, { "epoch": 3.305521472392638, "grad_norm": 0.2449560910463333, "learning_rate": 4.7930369477581475e-05, "loss": 0.8887914419174194, "step": 2694 }, { "epoch": 3.3067484662576687, "grad_norm": 0.1906341314315796, "learning_rate": 4.792836941682015e-05, "loss": 0.8274688720703125, "step": 2695 }, { "epoch": 3.3079754601226994, "grad_norm": 0.2105097621679306, "learning_rate": 4.7926368431881644e-05, "loss": 0.8906071186065674, "step": 2696 }, { "epoch": 3.30920245398773, "grad_norm": 0.256015807390213, "learning_rate": 4.792436652284661e-05, "loss": 0.9019607901573181, "step": 2697 }, { "epoch": 3.3104294478527607, "grad_norm": 0.20877206325531006, "learning_rate": 4.792236368979575e-05, "loss": 0.9930169582366943, "step": 2698 }, { "epoch": 3.3116564417177914, "grad_norm": 0.18386812508106232, "learning_rate": 4.792035993280979e-05, "loss": 0.8385190367698669, "step": 2699 }, { "epoch": 3.312883435582822, "grad_norm": 0.21755388379096985, "learning_rate": 4.791835525196948e-05, "loss": 0.7796396613121033, "step": 2700 }, { "epoch": 3.3141104294478527, "grad_norm": 0.24297715723514557, "learning_rate": 4.791634964735564e-05, "loss": 0.8519504070281982, "step": 2701 }, { "epoch": 3.3153374233128834, "grad_norm": 0.2319105863571167, "learning_rate": 4.791434311904911e-05, "loss": 0.7640889883041382, "step": 2702 }, { "epoch": 3.316564417177914, "grad_norm": 0.29149091243743896, "learning_rate": 4.791233566713076e-05, "loss": 0.7215287089347839, "step": 2703 }, { "epoch": 3.3177914110429447, "grad_norm": 0.23436392843723297, "learning_rate": 4.791032729168151e-05, "loss": 0.824523389339447, "step": 2704 }, { "epoch": 3.3190184049079754, "grad_norm": 0.23361603915691376, "learning_rate": 4.79083179927823e-05, "loss": 0.8604996800422668, "step": 2705 }, { "epoch": 3.320245398773006, "grad_norm": 0.2257014811038971, "learning_rate": 4.790630777051414e-05, "loss": 0.821938157081604, "step": 2706 }, { "epoch": 3.3214723926380367, "grad_norm": 0.1829659640789032, "learning_rate": 4.7904296624958036e-05, "loss": 0.8515422344207764, "step": 2707 }, { "epoch": 3.3226993865030674, "grad_norm": 0.2137095183134079, "learning_rate": 4.7902284556195064e-05, "loss": 0.8157206773757935, "step": 2708 }, { "epoch": 3.323926380368098, "grad_norm": 0.2406863570213318, "learning_rate": 4.790027156430632e-05, "loss": 0.7519418001174927, "step": 2709 }, { "epoch": 3.3251533742331287, "grad_norm": 0.18625982105731964, "learning_rate": 4.789825764937294e-05, "loss": 1.0334992408752441, "step": 2710 }, { "epoch": 3.3263803680981594, "grad_norm": 0.24286672472953796, "learning_rate": 4.78962428114761e-05, "loss": 0.747796356678009, "step": 2711 }, { "epoch": 3.32760736196319, "grad_norm": 0.17713706195354462, "learning_rate": 4.789422705069703e-05, "loss": 0.9115180969238281, "step": 2712 }, { "epoch": 3.3288343558282207, "grad_norm": 0.2446252405643463, "learning_rate": 4.789221036711695e-05, "loss": 0.7986074090003967, "step": 2713 }, { "epoch": 3.3300613496932514, "grad_norm": 0.21622073650360107, "learning_rate": 4.7890192760817166e-05, "loss": 0.7095431089401245, "step": 2714 }, { "epoch": 3.331288343558282, "grad_norm": 0.19683437049388885, "learning_rate": 4.788817423187899e-05, "loss": 0.8680762648582458, "step": 2715 }, { "epoch": 3.3325153374233127, "grad_norm": 0.2064422369003296, "learning_rate": 4.78861547803838e-05, "loss": 0.9816429615020752, "step": 2716 }, { "epoch": 3.3337423312883434, "grad_norm": 0.23331882059574127, "learning_rate": 4.788413440641297e-05, "loss": 0.8097834587097168, "step": 2717 }, { "epoch": 3.334969325153374, "grad_norm": 0.262165367603302, "learning_rate": 4.7882113110047966e-05, "loss": 0.5380558371543884, "step": 2718 }, { "epoch": 3.3361963190184047, "grad_norm": 0.21147167682647705, "learning_rate": 4.788009089137023e-05, "loss": 0.8471550941467285, "step": 2719 }, { "epoch": 3.3374233128834354, "grad_norm": 0.2175445854663849, "learning_rate": 4.78780677504613e-05, "loss": 0.7139975428581238, "step": 2720 }, { "epoch": 3.338650306748466, "grad_norm": 0.24162988364696503, "learning_rate": 4.7876043687402695e-05, "loss": 0.5818687081336975, "step": 2721 }, { "epoch": 3.3398773006134967, "grad_norm": 0.21002697944641113, "learning_rate": 4.7874018702276025e-05, "loss": 0.8169912099838257, "step": 2722 }, { "epoch": 3.3411042944785274, "grad_norm": 0.21462374925613403, "learning_rate": 4.787199279516289e-05, "loss": 0.6855207681655884, "step": 2723 }, { "epoch": 3.3423312883435585, "grad_norm": 0.23838786780834198, "learning_rate": 4.786996596614497e-05, "loss": 0.6927778720855713, "step": 2724 }, { "epoch": 3.3435582822085887, "grad_norm": 0.2806463837623596, "learning_rate": 4.7867938215303944e-05, "loss": 0.7255816459655762, "step": 2725 }, { "epoch": 3.34478527607362, "grad_norm": 0.21748317778110504, "learning_rate": 4.7865909542721545e-05, "loss": 0.8241163492202759, "step": 2726 }, { "epoch": 3.34601226993865, "grad_norm": 0.22983701527118683, "learning_rate": 4.786387994847955e-05, "loss": 0.7883352041244507, "step": 2727 }, { "epoch": 3.347239263803681, "grad_norm": 0.21791601181030273, "learning_rate": 4.7861849432659764e-05, "loss": 0.8251891136169434, "step": 2728 }, { "epoch": 3.3484662576687114, "grad_norm": 0.1727931946516037, "learning_rate": 4.785981799534404e-05, "loss": 0.9482555389404297, "step": 2729 }, { "epoch": 3.3496932515337425, "grad_norm": 0.26763916015625, "learning_rate": 4.7857785636614236e-05, "loss": 0.6605220437049866, "step": 2730 }, { "epoch": 3.3509202453987728, "grad_norm": 0.21515949070453644, "learning_rate": 4.78557523565523e-05, "loss": 0.7904795408248901, "step": 2731 }, { "epoch": 3.352147239263804, "grad_norm": 0.2692035734653473, "learning_rate": 4.785371815524017e-05, "loss": 0.7060977220535278, "step": 2732 }, { "epoch": 3.3533742331288345, "grad_norm": 0.2040216326713562, "learning_rate": 4.7851683032759834e-05, "loss": 0.803905189037323, "step": 2733 }, { "epoch": 3.354601226993865, "grad_norm": 0.19754770398139954, "learning_rate": 4.7849646989193335e-05, "loss": 0.8658711910247803, "step": 2734 }, { "epoch": 3.355828220858896, "grad_norm": 0.17055752873420715, "learning_rate": 4.784761002462273e-05, "loss": 0.8974363803863525, "step": 2735 }, { "epoch": 3.3570552147239265, "grad_norm": 0.18981362879276276, "learning_rate": 4.7845572139130134e-05, "loss": 0.860994815826416, "step": 2736 }, { "epoch": 3.358282208588957, "grad_norm": 0.23058785498142242, "learning_rate": 4.784353333279769e-05, "loss": 0.7192444801330566, "step": 2737 }, { "epoch": 3.359509202453988, "grad_norm": 0.2300078272819519, "learning_rate": 4.7841493605707555e-05, "loss": 0.7442779541015625, "step": 2738 }, { "epoch": 3.3607361963190185, "grad_norm": 0.24791991710662842, "learning_rate": 4.783945295794197e-05, "loss": 0.8014806509017944, "step": 2739 }, { "epoch": 3.361963190184049, "grad_norm": 0.24854907393455505, "learning_rate": 4.783741138958316e-05, "loss": 0.6785538196563721, "step": 2740 }, { "epoch": 3.36319018404908, "grad_norm": 0.2041131854057312, "learning_rate": 4.783536890071345e-05, "loss": 0.7353248000144958, "step": 2741 }, { "epoch": 3.3644171779141105, "grad_norm": 0.20378313958644867, "learning_rate": 4.783332549141514e-05, "loss": 0.9140223264694214, "step": 2742 }, { "epoch": 3.365644171779141, "grad_norm": 0.2127612829208374, "learning_rate": 4.78312811617706e-05, "loss": 0.808411717414856, "step": 2743 }, { "epoch": 3.366871165644172, "grad_norm": 0.21443259716033936, "learning_rate": 4.7829235911862235e-05, "loss": 0.7279045581817627, "step": 2744 }, { "epoch": 3.3680981595092025, "grad_norm": 0.1945236176252365, "learning_rate": 4.7827189741772486e-05, "loss": 0.7844163775444031, "step": 2745 }, { "epoch": 3.369325153374233, "grad_norm": 0.18306592106819153, "learning_rate": 4.782514265158382e-05, "loss": 0.9228062629699707, "step": 2746 }, { "epoch": 3.370552147239264, "grad_norm": 0.3145502209663391, "learning_rate": 4.782309464137875e-05, "loss": 0.7139025926589966, "step": 2747 }, { "epoch": 3.3717791411042946, "grad_norm": 0.1754879653453827, "learning_rate": 4.782104571123983e-05, "loss": 1.1148641109466553, "step": 2748 }, { "epoch": 3.373006134969325, "grad_norm": 0.22210781276226044, "learning_rate": 4.781899586124965e-05, "loss": 0.7437640428543091, "step": 2749 }, { "epoch": 3.374233128834356, "grad_norm": 0.2835889160633087, "learning_rate": 4.781694509149083e-05, "loss": 0.706926703453064, "step": 2750 }, { "epoch": 3.3754601226993866, "grad_norm": 0.21101371943950653, "learning_rate": 4.781489340204602e-05, "loss": 0.8321959972381592, "step": 2751 }, { "epoch": 3.3766871165644172, "grad_norm": 0.22107265889644623, "learning_rate": 4.781284079299793e-05, "loss": 0.7250728607177734, "step": 2752 }, { "epoch": 3.377914110429448, "grad_norm": 0.2092137187719345, "learning_rate": 4.78107872644293e-05, "loss": 0.7746155261993408, "step": 2753 }, { "epoch": 3.3791411042944786, "grad_norm": 0.23185181617736816, "learning_rate": 4.780873281642289e-05, "loss": 0.855732262134552, "step": 2754 }, { "epoch": 3.3803680981595092, "grad_norm": 0.20277930796146393, "learning_rate": 4.7806677449061505e-05, "loss": 0.8194520473480225, "step": 2755 }, { "epoch": 3.38159509202454, "grad_norm": 0.1799309402704239, "learning_rate": 4.7804621162428004e-05, "loss": 0.7994312047958374, "step": 2756 }, { "epoch": 3.3828220858895706, "grad_norm": 0.22038885951042175, "learning_rate": 4.780256395660526e-05, "loss": 0.6853736639022827, "step": 2757 }, { "epoch": 3.3840490797546012, "grad_norm": 0.18659783899784088, "learning_rate": 4.7800505831676204e-05, "loss": 0.870958685874939, "step": 2758 }, { "epoch": 3.385276073619632, "grad_norm": 0.21046510338783264, "learning_rate": 4.779844678772378e-05, "loss": 0.9475998878479004, "step": 2759 }, { "epoch": 3.3865030674846626, "grad_norm": 0.2332015037536621, "learning_rate": 4.7796386824831005e-05, "loss": 0.920336902141571, "step": 2760 }, { "epoch": 3.3877300613496932, "grad_norm": 0.2315930277109146, "learning_rate": 4.779432594308088e-05, "loss": 0.7381739020347595, "step": 2761 }, { "epoch": 3.388957055214724, "grad_norm": 0.24988706409931183, "learning_rate": 4.7792264142556493e-05, "loss": 0.7271934747695923, "step": 2762 }, { "epoch": 3.3901840490797546, "grad_norm": 0.19679811596870422, "learning_rate": 4.779020142334094e-05, "loss": 0.8973644375801086, "step": 2763 }, { "epoch": 3.3914110429447852, "grad_norm": 0.2044113576412201, "learning_rate": 4.7788137785517375e-05, "loss": 0.9215115308761597, "step": 2764 }, { "epoch": 3.392638036809816, "grad_norm": 0.1879739761352539, "learning_rate": 4.778607322916896e-05, "loss": 0.950238049030304, "step": 2765 }, { "epoch": 3.3938650306748466, "grad_norm": 0.1921992152929306, "learning_rate": 4.778400775437892e-05, "loss": 0.8321022987365723, "step": 2766 }, { "epoch": 3.3950920245398772, "grad_norm": 0.2331366091966629, "learning_rate": 4.778194136123052e-05, "loss": 0.6898933053016663, "step": 2767 }, { "epoch": 3.396319018404908, "grad_norm": 0.19373776018619537, "learning_rate": 4.777987404980704e-05, "loss": 0.9246914386749268, "step": 2768 }, { "epoch": 3.3975460122699386, "grad_norm": 0.23095065355300903, "learning_rate": 4.7777805820191804e-05, "loss": 0.7326346635818481, "step": 2769 }, { "epoch": 3.3987730061349692, "grad_norm": 0.208687424659729, "learning_rate": 4.7775736672468174e-05, "loss": 0.8026928901672363, "step": 2770 }, { "epoch": 3.4, "grad_norm": 0.23029612004756927, "learning_rate": 4.7773666606719556e-05, "loss": 0.8360018134117126, "step": 2771 }, { "epoch": 3.4012269938650306, "grad_norm": 0.2243185043334961, "learning_rate": 4.7771595623029394e-05, "loss": 0.608132004737854, "step": 2772 }, { "epoch": 3.4024539877300612, "grad_norm": 0.2603650987148285, "learning_rate": 4.7769523721481164e-05, "loss": 0.811498761177063, "step": 2773 }, { "epoch": 3.403680981595092, "grad_norm": 0.22959916293621063, "learning_rate": 4.776745090215837e-05, "loss": 0.8414500951766968, "step": 2774 }, { "epoch": 3.4049079754601226, "grad_norm": 0.19082011282444, "learning_rate": 4.776537716514456e-05, "loss": 0.8986672163009644, "step": 2775 }, { "epoch": 3.4061349693251532, "grad_norm": 0.19538478553295135, "learning_rate": 4.7763302510523335e-05, "loss": 0.7723255157470703, "step": 2776 }, { "epoch": 3.407361963190184, "grad_norm": 0.22472164034843445, "learning_rate": 4.77612269383783e-05, "loss": 0.7893203496932983, "step": 2777 }, { "epoch": 3.4085889570552146, "grad_norm": 0.22577083110809326, "learning_rate": 4.7759150448793134e-05, "loss": 0.8830984234809875, "step": 2778 }, { "epoch": 3.4098159509202453, "grad_norm": 0.21255144476890564, "learning_rate": 4.7757073041851515e-05, "loss": 0.7868099212646484, "step": 2779 }, { "epoch": 3.411042944785276, "grad_norm": 0.2341146469116211, "learning_rate": 4.775499471763719e-05, "loss": 0.8637197017669678, "step": 2780 }, { "epoch": 3.4122699386503066, "grad_norm": 0.23167413473129272, "learning_rate": 4.775291547623393e-05, "loss": 0.9118650555610657, "step": 2781 }, { "epoch": 3.4134969325153373, "grad_norm": 0.24041013419628143, "learning_rate": 4.7750835317725534e-05, "loss": 0.7407669425010681, "step": 2782 }, { "epoch": 3.414723926380368, "grad_norm": 0.20835506916046143, "learning_rate": 4.774875424219586e-05, "loss": 0.8902024626731873, "step": 2783 }, { "epoch": 3.4159509202453986, "grad_norm": 0.20791202783584595, "learning_rate": 4.774667224972878e-05, "loss": 0.8743744492530823, "step": 2784 }, { "epoch": 3.4171779141104293, "grad_norm": 0.15592578053474426, "learning_rate": 4.774458934040822e-05, "loss": 0.8957438468933105, "step": 2785 }, { "epoch": 3.41840490797546, "grad_norm": 0.2844776213169098, "learning_rate": 4.774250551431813e-05, "loss": 0.8134422302246094, "step": 2786 }, { "epoch": 3.419631901840491, "grad_norm": 0.24437986314296722, "learning_rate": 4.774042077154251e-05, "loss": 0.8119805455207825, "step": 2787 }, { "epoch": 3.4208588957055213, "grad_norm": 0.20760564506053925, "learning_rate": 4.7738335112165376e-05, "loss": 0.7975063920021057, "step": 2788 }, { "epoch": 3.4220858895705524, "grad_norm": 0.22454001009464264, "learning_rate": 4.773624853627082e-05, "loss": 0.8858922123908997, "step": 2789 }, { "epoch": 3.4233128834355826, "grad_norm": 0.18673957884311676, "learning_rate": 4.7734161043942925e-05, "loss": 0.8293430805206299, "step": 2790 }, { "epoch": 3.4245398773006137, "grad_norm": 0.21449899673461914, "learning_rate": 4.7732072635265834e-05, "loss": 0.8503991961479187, "step": 2791 }, { "epoch": 3.425766871165644, "grad_norm": 0.21179674565792084, "learning_rate": 4.772998331032373e-05, "loss": 0.7282681465148926, "step": 2792 }, { "epoch": 3.426993865030675, "grad_norm": 0.20611420273780823, "learning_rate": 4.772789306920084e-05, "loss": 0.7931728363037109, "step": 2793 }, { "epoch": 3.4282208588957057, "grad_norm": 0.1976298987865448, "learning_rate": 4.7725801911981386e-05, "loss": 0.8409970998764038, "step": 2794 }, { "epoch": 3.4294478527607364, "grad_norm": 0.21108083426952362, "learning_rate": 4.7723709838749686e-05, "loss": 0.9305580854415894, "step": 2795 }, { "epoch": 3.430674846625767, "grad_norm": 0.18945394456386566, "learning_rate": 4.772161684959004e-05, "loss": 0.8918823003768921, "step": 2796 }, { "epoch": 3.4319018404907977, "grad_norm": 0.3634021580219269, "learning_rate": 4.771952294458683e-05, "loss": 0.8080326914787292, "step": 2797 }, { "epoch": 3.4331288343558284, "grad_norm": 0.3265364468097687, "learning_rate": 4.771742812382445e-05, "loss": 0.5717076659202576, "step": 2798 }, { "epoch": 3.434355828220859, "grad_norm": 0.35246843099594116, "learning_rate": 4.771533238738733e-05, "loss": 0.6113494634628296, "step": 2799 }, { "epoch": 3.4355828220858897, "grad_norm": 0.16996802389621735, "learning_rate": 4.771323573535995e-05, "loss": 0.8199682235717773, "step": 2800 }, { "epoch": 3.4368098159509204, "grad_norm": 0.21138352155685425, "learning_rate": 4.771113816782682e-05, "loss": 0.8680292367935181, "step": 2801 }, { "epoch": 3.438036809815951, "grad_norm": 0.18859897553920746, "learning_rate": 4.7709039684872474e-05, "loss": 0.8716961145401001, "step": 2802 }, { "epoch": 3.4392638036809817, "grad_norm": 0.20016133785247803, "learning_rate": 4.7706940286581514e-05, "loss": 0.6865622997283936, "step": 2803 }, { "epoch": 3.4404907975460124, "grad_norm": 0.26079463958740234, "learning_rate": 4.7704839973038556e-05, "loss": 0.7635281085968018, "step": 2804 }, { "epoch": 3.441717791411043, "grad_norm": 0.18618996441364288, "learning_rate": 4.7702738744328255e-05, "loss": 0.7295199632644653, "step": 2805 }, { "epoch": 3.4429447852760737, "grad_norm": 0.24514830112457275, "learning_rate": 4.7700636600535294e-05, "loss": 0.6793531775474548, "step": 2806 }, { "epoch": 3.4441717791411044, "grad_norm": 0.17471998929977417, "learning_rate": 4.7698533541744425e-05, "loss": 0.7582374811172485, "step": 2807 }, { "epoch": 3.445398773006135, "grad_norm": 0.18740558624267578, "learning_rate": 4.769642956804041e-05, "loss": 0.819505512714386, "step": 2808 }, { "epoch": 3.4466257668711657, "grad_norm": 0.18579182028770447, "learning_rate": 4.769432467950805e-05, "loss": 0.7873474359512329, "step": 2809 }, { "epoch": 3.4478527607361964, "grad_norm": 0.1965256929397583, "learning_rate": 4.7692218876232185e-05, "loss": 0.9465045928955078, "step": 2810 }, { "epoch": 3.449079754601227, "grad_norm": 0.2252974659204483, "learning_rate": 4.7690112158297704e-05, "loss": 0.8543291091918945, "step": 2811 }, { "epoch": 3.4503067484662577, "grad_norm": 0.24270795285701752, "learning_rate": 4.768800452578951e-05, "loss": 0.8398987054824829, "step": 2812 }, { "epoch": 3.4515337423312884, "grad_norm": 0.2303459346294403, "learning_rate": 4.768589597879256e-05, "loss": 0.9094648957252502, "step": 2813 }, { "epoch": 3.452760736196319, "grad_norm": 0.19348062574863434, "learning_rate": 4.768378651739185e-05, "loss": 0.941531777381897, "step": 2814 }, { "epoch": 3.4539877300613497, "grad_norm": 0.23798011243343353, "learning_rate": 4.7681676141672395e-05, "loss": 0.7661722898483276, "step": 2815 }, { "epoch": 3.4552147239263804, "grad_norm": 0.2066165953874588, "learning_rate": 4.7679564851719274e-05, "loss": 0.9967756271362305, "step": 2816 }, { "epoch": 3.456441717791411, "grad_norm": 0.18440771102905273, "learning_rate": 4.7677452647617574e-05, "loss": 0.7952733635902405, "step": 2817 }, { "epoch": 3.4576687116564417, "grad_norm": 0.2519596517086029, "learning_rate": 4.767533952945243e-05, "loss": 0.6132100820541382, "step": 2818 }, { "epoch": 3.4588957055214724, "grad_norm": 0.22291041910648346, "learning_rate": 4.7673225497309026e-05, "loss": 0.91411292552948, "step": 2819 }, { "epoch": 3.460122699386503, "grad_norm": 0.24755598604679108, "learning_rate": 4.767111055127257e-05, "loss": 0.8127889633178711, "step": 2820 }, { "epoch": 3.4613496932515337, "grad_norm": 0.18693777918815613, "learning_rate": 4.766899469142831e-05, "loss": 0.8623508214950562, "step": 2821 }, { "epoch": 3.4625766871165644, "grad_norm": 0.21547813713550568, "learning_rate": 4.7666877917861524e-05, "loss": 0.8365858793258667, "step": 2822 }, { "epoch": 3.463803680981595, "grad_norm": 0.2520809471607208, "learning_rate": 4.7664760230657536e-05, "loss": 0.6992161273956299, "step": 2823 }, { "epoch": 3.4650306748466257, "grad_norm": 0.24543194472789764, "learning_rate": 4.766264162990171e-05, "loss": 0.8224138021469116, "step": 2824 }, { "epoch": 3.4662576687116564, "grad_norm": 0.16338811814785004, "learning_rate": 4.766052211567943e-05, "loss": 0.8176746964454651, "step": 2825 }, { "epoch": 3.467484662576687, "grad_norm": 0.2134641408920288, "learning_rate": 4.7658401688076136e-05, "loss": 0.843353271484375, "step": 2826 }, { "epoch": 3.4687116564417177, "grad_norm": 0.22955970466136932, "learning_rate": 4.7656280347177296e-05, "loss": 0.8527363538742065, "step": 2827 }, { "epoch": 3.4699386503067484, "grad_norm": 0.23228895664215088, "learning_rate": 4.765415809306841e-05, "loss": 0.8410568237304688, "step": 2828 }, { "epoch": 3.471165644171779, "grad_norm": 0.24610498547554016, "learning_rate": 4.765203492583502e-05, "loss": 0.805512547492981, "step": 2829 }, { "epoch": 3.4723926380368098, "grad_norm": 0.22083833813667297, "learning_rate": 4.7649910845562715e-05, "loss": 0.6905598640441895, "step": 2830 }, { "epoch": 3.4736196319018404, "grad_norm": 0.20565102994441986, "learning_rate": 4.76477858523371e-05, "loss": 0.829096794128418, "step": 2831 }, { "epoch": 3.474846625766871, "grad_norm": 0.2509104609489441, "learning_rate": 4.7645659946243834e-05, "loss": 0.76519775390625, "step": 2832 }, { "epoch": 3.4760736196319018, "grad_norm": 0.29097646474838257, "learning_rate": 4.76435331273686e-05, "loss": 0.639985978603363, "step": 2833 }, { "epoch": 3.4773006134969324, "grad_norm": 0.18367242813110352, "learning_rate": 4.7641405395797125e-05, "loss": 0.8036612272262573, "step": 2834 }, { "epoch": 3.478527607361963, "grad_norm": 0.19891279935836792, "learning_rate": 4.763927675161518e-05, "loss": 1.021825909614563, "step": 2835 }, { "epoch": 3.4797546012269938, "grad_norm": 0.22282980382442474, "learning_rate": 4.763714719490855e-05, "loss": 0.767991840839386, "step": 2836 }, { "epoch": 3.4809815950920244, "grad_norm": 0.2354462742805481, "learning_rate": 4.763501672576308e-05, "loss": 0.8564132452011108, "step": 2837 }, { "epoch": 3.482208588957055, "grad_norm": 0.19286787509918213, "learning_rate": 4.763288534426465e-05, "loss": 1.0559303760528564, "step": 2838 }, { "epoch": 3.4834355828220858, "grad_norm": 0.20705099403858185, "learning_rate": 4.763075305049915e-05, "loss": 0.8279393315315247, "step": 2839 }, { "epoch": 3.4846625766871164, "grad_norm": 0.2178925722837448, "learning_rate": 4.7628619844552556e-05, "loss": 0.809686541557312, "step": 2840 }, { "epoch": 3.485889570552147, "grad_norm": 0.20904715359210968, "learning_rate": 4.7626485726510826e-05, "loss": 0.9736857414245605, "step": 2841 }, { "epoch": 3.4871165644171778, "grad_norm": 0.22475562989711761, "learning_rate": 4.7624350696459994e-05, "loss": 0.849773645401001, "step": 2842 }, { "epoch": 3.4883435582822084, "grad_norm": 0.20549167692661285, "learning_rate": 4.7622214754486105e-05, "loss": 0.9280843734741211, "step": 2843 }, { "epoch": 3.489570552147239, "grad_norm": 0.2333540916442871, "learning_rate": 4.7620077900675265e-05, "loss": 0.8502598404884338, "step": 2844 }, { "epoch": 3.4907975460122698, "grad_norm": 0.18048377335071564, "learning_rate": 4.7617940135113606e-05, "loss": 0.86424320936203, "step": 2845 }, { "epoch": 3.4920245398773004, "grad_norm": 0.24866172671318054, "learning_rate": 4.761580145788728e-05, "loss": 0.7077754735946655, "step": 2846 }, { "epoch": 3.493251533742331, "grad_norm": 0.20013269782066345, "learning_rate": 4.761366186908249e-05, "loss": 0.840092658996582, "step": 2847 }, { "epoch": 3.4944785276073618, "grad_norm": 0.20725534856319427, "learning_rate": 4.76115213687855e-05, "loss": 0.763852596282959, "step": 2848 }, { "epoch": 3.4957055214723924, "grad_norm": 0.2454683482646942, "learning_rate": 4.7609379957082566e-05, "loss": 0.8405619859695435, "step": 2849 }, { "epoch": 3.4969325153374236, "grad_norm": 0.22487811744213104, "learning_rate": 4.760723763406002e-05, "loss": 0.791482150554657, "step": 2850 }, { "epoch": 3.4981595092024538, "grad_norm": 0.21863974630832672, "learning_rate": 4.760509439980419e-05, "loss": 0.8463672399520874, "step": 2851 }, { "epoch": 3.499386503067485, "grad_norm": 0.2424326092004776, "learning_rate": 4.760295025440149e-05, "loss": 0.7651511430740356, "step": 2852 }, { "epoch": 3.500613496932515, "grad_norm": 0.21831057965755463, "learning_rate": 4.7600805197938325e-05, "loss": 0.8398932218551636, "step": 2853 }, { "epoch": 3.5018404907975462, "grad_norm": 0.20527450740337372, "learning_rate": 4.759865923050116e-05, "loss": 0.7272389531135559, "step": 2854 }, { "epoch": 3.5030674846625764, "grad_norm": 0.2234433889389038, "learning_rate": 4.759651235217649e-05, "loss": 0.8147491216659546, "step": 2855 }, { "epoch": 3.5042944785276076, "grad_norm": 0.1814185529947281, "learning_rate": 4.759436456305086e-05, "loss": 0.9217543601989746, "step": 2856 }, { "epoch": 3.505521472392638, "grad_norm": 0.20902378857135773, "learning_rate": 4.7592215863210845e-05, "loss": 0.781730055809021, "step": 2857 }, { "epoch": 3.506748466257669, "grad_norm": 0.2070462703704834, "learning_rate": 4.7590066252743034e-05, "loss": 0.7914684414863586, "step": 2858 }, { "epoch": 3.507975460122699, "grad_norm": 0.22626182436943054, "learning_rate": 4.758791573173408e-05, "loss": 0.8721259832382202, "step": 2859 }, { "epoch": 3.5092024539877302, "grad_norm": 0.20874592661857605, "learning_rate": 4.758576430027066e-05, "loss": 0.8163467645645142, "step": 2860 }, { "epoch": 3.510429447852761, "grad_norm": 0.29400748014450073, "learning_rate": 4.758361195843951e-05, "loss": 0.8931046724319458, "step": 2861 }, { "epoch": 3.5116564417177916, "grad_norm": 0.1883208453655243, "learning_rate": 4.758145870632736e-05, "loss": 0.713059663772583, "step": 2862 }, { "epoch": 3.5128834355828222, "grad_norm": 0.2085171788930893, "learning_rate": 4.757930454402103e-05, "loss": 0.9037438631057739, "step": 2863 }, { "epoch": 3.514110429447853, "grad_norm": 0.1788589358329773, "learning_rate": 4.757714947160732e-05, "loss": 0.9103807806968689, "step": 2864 }, { "epoch": 3.5153374233128836, "grad_norm": 0.18704968690872192, "learning_rate": 4.757499348917311e-05, "loss": 0.8118147850036621, "step": 2865 }, { "epoch": 3.5165644171779142, "grad_norm": 0.19693328440189362, "learning_rate": 4.7572836596805305e-05, "loss": 0.9055487513542175, "step": 2866 }, { "epoch": 3.517791411042945, "grad_norm": 0.20130333304405212, "learning_rate": 4.757067879459083e-05, "loss": 1.006637454032898, "step": 2867 }, { "epoch": 3.5190184049079756, "grad_norm": 0.23575164377689362, "learning_rate": 4.7568520082616665e-05, "loss": 0.6461695432662964, "step": 2868 }, { "epoch": 3.5202453987730062, "grad_norm": 0.20713970065116882, "learning_rate": 4.756636046096982e-05, "loss": 0.9314336776733398, "step": 2869 }, { "epoch": 3.521472392638037, "grad_norm": 0.2177211493253708, "learning_rate": 4.756419992973735e-05, "loss": 0.7693988680839539, "step": 2870 }, { "epoch": 3.5226993865030676, "grad_norm": 0.17692767083644867, "learning_rate": 4.756203848900634e-05, "loss": 0.8282492160797119, "step": 2871 }, { "epoch": 3.5239263803680982, "grad_norm": 0.19730210304260254, "learning_rate": 4.75598761388639e-05, "loss": 0.9618101119995117, "step": 2872 }, { "epoch": 3.525153374233129, "grad_norm": 0.19694431126117706, "learning_rate": 4.75577128793972e-05, "loss": 0.8354672193527222, "step": 2873 }, { "epoch": 3.5263803680981596, "grad_norm": 0.290192186832428, "learning_rate": 4.755554871069343e-05, "loss": 0.5524240732192993, "step": 2874 }, { "epoch": 3.5276073619631902, "grad_norm": 0.1601630002260208, "learning_rate": 4.755338363283982e-05, "loss": 0.8075275421142578, "step": 2875 }, { "epoch": 3.528834355828221, "grad_norm": 0.3006714880466461, "learning_rate": 4.755121764592365e-05, "loss": 0.8533709645271301, "step": 2876 }, { "epoch": 3.5300613496932516, "grad_norm": 0.2184450775384903, "learning_rate": 4.75490507500322e-05, "loss": 0.7367390394210815, "step": 2877 }, { "epoch": 3.5312883435582823, "grad_norm": 0.23734050989151, "learning_rate": 4.7546882945252836e-05, "loss": 0.7592169642448425, "step": 2878 }, { "epoch": 3.532515337423313, "grad_norm": 0.23726055026054382, "learning_rate": 4.754471423167292e-05, "loss": 0.6700854301452637, "step": 2879 }, { "epoch": 3.5337423312883436, "grad_norm": 0.19699890911579132, "learning_rate": 4.754254460937988e-05, "loss": 0.8569243550300598, "step": 2880 }, { "epoch": 3.5349693251533743, "grad_norm": 0.2136838287115097, "learning_rate": 4.7540374078461155e-05, "loss": 0.7892166972160339, "step": 2881 }, { "epoch": 3.536196319018405, "grad_norm": 0.19242717325687408, "learning_rate": 4.7538202639004234e-05, "loss": 0.9113836288452148, "step": 2882 }, { "epoch": 3.5374233128834356, "grad_norm": 0.18445709347724915, "learning_rate": 4.753603029109666e-05, "loss": 0.8726551532745361, "step": 2883 }, { "epoch": 3.5386503067484663, "grad_norm": 0.2685413062572479, "learning_rate": 4.753385703482597e-05, "loss": 0.7338628172874451, "step": 2884 }, { "epoch": 3.539877300613497, "grad_norm": 0.19652608036994934, "learning_rate": 4.753168287027977e-05, "loss": 0.9342548847198486, "step": 2885 }, { "epoch": 3.5411042944785276, "grad_norm": 0.20980221033096313, "learning_rate": 4.752950779754569e-05, "loss": 0.8295788168907166, "step": 2886 }, { "epoch": 3.5423312883435583, "grad_norm": 0.21121546626091003, "learning_rate": 4.752733181671142e-05, "loss": 0.792351245880127, "step": 2887 }, { "epoch": 3.543558282208589, "grad_norm": 0.22561584413051605, "learning_rate": 4.752515492786465e-05, "loss": 0.6770650148391724, "step": 2888 }, { "epoch": 3.5447852760736196, "grad_norm": 0.23287220299243927, "learning_rate": 4.752297713109313e-05, "loss": 0.830244779586792, "step": 2889 }, { "epoch": 3.5460122699386503, "grad_norm": 0.3222368061542511, "learning_rate": 4.7520798426484635e-05, "loss": 0.7930164337158203, "step": 2890 }, { "epoch": 3.547239263803681, "grad_norm": 0.17144568264484406, "learning_rate": 4.751861881412698e-05, "loss": 0.9327809810638428, "step": 2891 }, { "epoch": 3.5484662576687116, "grad_norm": 0.2176506370306015, "learning_rate": 4.751643829410804e-05, "loss": 0.7841120958328247, "step": 2892 }, { "epoch": 3.5496932515337423, "grad_norm": 0.19060544669628143, "learning_rate": 4.751425686651568e-05, "loss": 0.8851468563079834, "step": 2893 }, { "epoch": 3.550920245398773, "grad_norm": 0.22288817167282104, "learning_rate": 4.751207453143785e-05, "loss": 0.9467942714691162, "step": 2894 }, { "epoch": 3.5521472392638036, "grad_norm": 0.22406958043575287, "learning_rate": 4.75098912889625e-05, "loss": 0.8004273176193237, "step": 2895 }, { "epoch": 3.5533742331288343, "grad_norm": 0.2190690040588379, "learning_rate": 4.750770713917763e-05, "loss": 0.9718437194824219, "step": 2896 }, { "epoch": 3.554601226993865, "grad_norm": 0.21385329961776733, "learning_rate": 4.750552208217127e-05, "loss": 0.7364851236343384, "step": 2897 }, { "epoch": 3.5558282208588956, "grad_norm": 0.20229949057102203, "learning_rate": 4.7503336118031515e-05, "loss": 0.7061032056808472, "step": 2898 }, { "epoch": 3.5570552147239263, "grad_norm": 0.18854068219661713, "learning_rate": 4.7501149246846454e-05, "loss": 0.9780504703521729, "step": 2899 }, { "epoch": 3.558282208588957, "grad_norm": 0.2597237825393677, "learning_rate": 4.749896146870425e-05, "loss": 0.8139333128929138, "step": 2900 }, { "epoch": 3.5595092024539876, "grad_norm": 0.1829136461019516, "learning_rate": 4.749677278369307e-05, "loss": 0.8249130249023438, "step": 2901 }, { "epoch": 3.5607361963190183, "grad_norm": 0.28028133511543274, "learning_rate": 4.7494583191901146e-05, "loss": 0.7301067113876343, "step": 2902 }, { "epoch": 3.561963190184049, "grad_norm": 0.18466880917549133, "learning_rate": 4.749239269341673e-05, "loss": 0.8745617866516113, "step": 2903 }, { "epoch": 3.5631901840490796, "grad_norm": 0.22179052233695984, "learning_rate": 4.7490201288328115e-05, "loss": 0.7206697463989258, "step": 2904 }, { "epoch": 3.5644171779141103, "grad_norm": 0.19008134305477142, "learning_rate": 4.748800897672364e-05, "loss": 0.9296181797981262, "step": 2905 }, { "epoch": 3.565644171779141, "grad_norm": 0.17429658770561218, "learning_rate": 4.7485815758691646e-05, "loss": 0.9576981067657471, "step": 2906 }, { "epoch": 3.5668711656441716, "grad_norm": 0.22108668088912964, "learning_rate": 4.748362163432055e-05, "loss": 0.8112186193466187, "step": 2907 }, { "epoch": 3.5680981595092023, "grad_norm": 0.23794583976268768, "learning_rate": 4.7481426603698806e-05, "loss": 0.7043781280517578, "step": 2908 }, { "epoch": 3.5693251533742334, "grad_norm": 0.21494410932064056, "learning_rate": 4.747923066691487e-05, "loss": 0.7674131393432617, "step": 2909 }, { "epoch": 3.5705521472392636, "grad_norm": 0.20070384442806244, "learning_rate": 4.747703382405725e-05, "loss": 0.8355628252029419, "step": 2910 }, { "epoch": 3.5717791411042947, "grad_norm": 0.26643553376197815, "learning_rate": 4.74748360752145e-05, "loss": 0.7054712176322937, "step": 2911 }, { "epoch": 3.573006134969325, "grad_norm": 0.23170524835586548, "learning_rate": 4.747263742047522e-05, "loss": 0.8802165985107422, "step": 2912 }, { "epoch": 3.574233128834356, "grad_norm": 0.24664701521396637, "learning_rate": 4.7470437859928014e-05, "loss": 0.7035785913467407, "step": 2913 }, { "epoch": 3.5754601226993863, "grad_norm": 0.18221193552017212, "learning_rate": 4.746823739366154e-05, "loss": 0.906800389289856, "step": 2914 }, { "epoch": 3.5766871165644174, "grad_norm": 0.1986820250749588, "learning_rate": 4.746603602176451e-05, "loss": 0.9433172345161438, "step": 2915 }, { "epoch": 3.5779141104294476, "grad_norm": 0.21589826047420502, "learning_rate": 4.746383374432564e-05, "loss": 0.8359154462814331, "step": 2916 }, { "epoch": 3.5791411042944787, "grad_norm": 0.20176216959953308, "learning_rate": 4.7461630561433694e-05, "loss": 0.735136866569519, "step": 2917 }, { "epoch": 3.580368098159509, "grad_norm": 0.1976436972618103, "learning_rate": 4.7459426473177486e-05, "loss": 0.8950773477554321, "step": 2918 }, { "epoch": 3.58159509202454, "grad_norm": 0.2002682089805603, "learning_rate": 4.745722147964585e-05, "loss": 0.66373610496521, "step": 2919 }, { "epoch": 3.5828220858895703, "grad_norm": 0.22070519626140594, "learning_rate": 4.7455015580927674e-05, "loss": 0.827055811882019, "step": 2920 }, { "epoch": 3.5840490797546014, "grad_norm": 0.1702735424041748, "learning_rate": 4.7452808777111864e-05, "loss": 0.8761751651763916, "step": 2921 }, { "epoch": 3.5852760736196316, "grad_norm": 0.2019413262605667, "learning_rate": 4.745060106828736e-05, "loss": 0.7893767356872559, "step": 2922 }, { "epoch": 3.5865030674846627, "grad_norm": 0.20365361869335175, "learning_rate": 4.7448392454543164e-05, "loss": 0.6795104742050171, "step": 2923 }, { "epoch": 3.5877300613496934, "grad_norm": 0.1961229145526886, "learning_rate": 4.74461829359683e-05, "loss": 0.6985466480255127, "step": 2924 }, { "epoch": 3.588957055214724, "grad_norm": 0.21633374691009521, "learning_rate": 4.744397251265181e-05, "loss": 0.8593599200248718, "step": 2925 }, { "epoch": 3.5901840490797547, "grad_norm": 0.20028209686279297, "learning_rate": 4.74417611846828e-05, "loss": 0.8868627548217773, "step": 2926 }, { "epoch": 3.5914110429447854, "grad_norm": 0.2067929059267044, "learning_rate": 4.7439548952150404e-05, "loss": 0.8857611417770386, "step": 2927 }, { "epoch": 3.592638036809816, "grad_norm": 0.21599756181240082, "learning_rate": 4.743733581514379e-05, "loss": 0.8377295732498169, "step": 2928 }, { "epoch": 3.5938650306748468, "grad_norm": 0.25076547265052795, "learning_rate": 4.743512177375217e-05, "loss": 0.6465381979942322, "step": 2929 }, { "epoch": 3.5950920245398774, "grad_norm": 0.20770904421806335, "learning_rate": 4.7432906828064763e-05, "loss": 1.0281622409820557, "step": 2930 }, { "epoch": 3.596319018404908, "grad_norm": 0.2094404548406601, "learning_rate": 4.743069097817088e-05, "loss": 0.7283060550689697, "step": 2931 }, { "epoch": 3.5975460122699388, "grad_norm": 0.19450712203979492, "learning_rate": 4.74284742241598e-05, "loss": 0.973059892654419, "step": 2932 }, { "epoch": 3.5987730061349694, "grad_norm": 0.2016756534576416, "learning_rate": 4.742625656612091e-05, "loss": 0.7132539749145508, "step": 2933 }, { "epoch": 3.6, "grad_norm": 0.2680968642234802, "learning_rate": 4.7424038004143566e-05, "loss": 0.8060261011123657, "step": 2934 }, { "epoch": 3.6012269938650308, "grad_norm": 0.22879524528980255, "learning_rate": 4.742181853831721e-05, "loss": 0.8975297212600708, "step": 2935 }, { "epoch": 3.6024539877300614, "grad_norm": 0.1646626591682434, "learning_rate": 4.741959816873129e-05, "loss": 0.87462317943573, "step": 2936 }, { "epoch": 3.603680981595092, "grad_norm": 0.19546686112880707, "learning_rate": 4.741737689547533e-05, "loss": 0.968325138092041, "step": 2937 }, { "epoch": 3.6049079754601228, "grad_norm": 0.19877813756465912, "learning_rate": 4.741515471863883e-05, "loss": 0.8119819164276123, "step": 2938 }, { "epoch": 3.6061349693251534, "grad_norm": 0.20547150075435638, "learning_rate": 4.741293163831138e-05, "loss": 0.8134312629699707, "step": 2939 }, { "epoch": 3.607361963190184, "grad_norm": 0.21605859696865082, "learning_rate": 4.7410707654582565e-05, "loss": 0.8054148554801941, "step": 2940 }, { "epoch": 3.6085889570552148, "grad_norm": 0.248563751578331, "learning_rate": 4.740848276754205e-05, "loss": 0.9280422925949097, "step": 2941 }, { "epoch": 3.6098159509202454, "grad_norm": 0.186083123087883, "learning_rate": 4.740625697727951e-05, "loss": 0.7918494939804077, "step": 2942 }, { "epoch": 3.611042944785276, "grad_norm": 0.24058686196804047, "learning_rate": 4.7404030283884655e-05, "loss": 0.8288037776947021, "step": 2943 }, { "epoch": 3.6122699386503068, "grad_norm": 0.1942007839679718, "learning_rate": 4.740180268744724e-05, "loss": 0.7961966395378113, "step": 2944 }, { "epoch": 3.6134969325153374, "grad_norm": 0.2120371162891388, "learning_rate": 4.739957418805704e-05, "loss": 0.9002584218978882, "step": 2945 }, { "epoch": 3.614723926380368, "grad_norm": 0.23773305118083954, "learning_rate": 4.73973447858039e-05, "loss": 0.7032536268234253, "step": 2946 }, { "epoch": 3.6159509202453988, "grad_norm": 0.21928681433200836, "learning_rate": 4.7395114480777664e-05, "loss": 0.8722984790802002, "step": 2947 }, { "epoch": 3.6171779141104294, "grad_norm": 0.20108482241630554, "learning_rate": 4.739288327306824e-05, "loss": 0.7255898714065552, "step": 2948 }, { "epoch": 3.61840490797546, "grad_norm": 0.1884147673845291, "learning_rate": 4.739065116276555e-05, "loss": 0.883159875869751, "step": 2949 }, { "epoch": 3.6196319018404908, "grad_norm": 0.2341294288635254, "learning_rate": 4.7388418149959585e-05, "loss": 0.792850136756897, "step": 2950 }, { "epoch": 3.6208588957055214, "grad_norm": 0.2139526903629303, "learning_rate": 4.7386184234740335e-05, "loss": 0.7970523834228516, "step": 2951 }, { "epoch": 3.622085889570552, "grad_norm": 0.1994767040014267, "learning_rate": 4.738394941719784e-05, "loss": 0.8029937148094177, "step": 2952 }, { "epoch": 3.6233128834355828, "grad_norm": 0.2040632665157318, "learning_rate": 4.7381713697422185e-05, "loss": 0.7718590497970581, "step": 2953 }, { "epoch": 3.6245398773006134, "grad_norm": 0.1938776671886444, "learning_rate": 4.737947707550349e-05, "loss": 0.8043335676193237, "step": 2954 }, { "epoch": 3.625766871165644, "grad_norm": 0.2639082372188568, "learning_rate": 4.73772395515319e-05, "loss": 0.7252963185310364, "step": 2955 }, { "epoch": 3.626993865030675, "grad_norm": 0.20441341400146484, "learning_rate": 4.737500112559761e-05, "loss": 0.7610917091369629, "step": 2956 }, { "epoch": 3.6282208588957054, "grad_norm": 0.23727154731750488, "learning_rate": 4.737276179779083e-05, "loss": 0.7550495266914368, "step": 2957 }, { "epoch": 3.629447852760736, "grad_norm": 0.1916504204273224, "learning_rate": 4.7370521568201845e-05, "loss": 0.7984331250190735, "step": 2958 }, { "epoch": 3.630674846625767, "grad_norm": 0.21017217636108398, "learning_rate": 4.736828043692093e-05, "loss": 0.913048505783081, "step": 2959 }, { "epoch": 3.6319018404907975, "grad_norm": 0.1835520714521408, "learning_rate": 4.736603840403843e-05, "loss": 0.9066942930221558, "step": 2960 }, { "epoch": 3.633128834355828, "grad_norm": 0.2259903848171234, "learning_rate": 4.7363795469644704e-05, "loss": 0.619655430316925, "step": 2961 }, { "epoch": 3.634355828220859, "grad_norm": 0.21206121146678925, "learning_rate": 4.736155163383017e-05, "loss": 0.8471543788909912, "step": 2962 }, { "epoch": 3.6355828220858895, "grad_norm": 0.24532955884933472, "learning_rate": 4.7359306896685275e-05, "loss": 0.6376051902770996, "step": 2963 }, { "epoch": 3.63680981595092, "grad_norm": 0.2095855176448822, "learning_rate": 4.7357061258300476e-05, "loss": 0.7081872224807739, "step": 2964 }, { "epoch": 3.638036809815951, "grad_norm": 0.21621383726596832, "learning_rate": 4.7354814718766315e-05, "loss": 0.8628581762313843, "step": 2965 }, { "epoch": 3.6392638036809815, "grad_norm": 0.17999452352523804, "learning_rate": 4.735256727817332e-05, "loss": 0.8730310797691345, "step": 2966 }, { "epoch": 3.640490797546012, "grad_norm": 0.2202131748199463, "learning_rate": 4.735031893661209e-05, "loss": 0.7992177605628967, "step": 2967 }, { "epoch": 3.641717791411043, "grad_norm": 0.20390549302101135, "learning_rate": 4.734806969417326e-05, "loss": 0.8707658052444458, "step": 2968 }, { "epoch": 3.6429447852760735, "grad_norm": 0.23375150561332703, "learning_rate": 4.7345819550947466e-05, "loss": 0.8634852766990662, "step": 2969 }, { "epoch": 3.644171779141104, "grad_norm": 0.18169225752353668, "learning_rate": 4.734356850702543e-05, "loss": 0.9143979549407959, "step": 2970 }, { "epoch": 3.645398773006135, "grad_norm": 0.21866564452648163, "learning_rate": 4.734131656249787e-05, "loss": 0.8917627334594727, "step": 2971 }, { "epoch": 3.646625766871166, "grad_norm": 0.19132904708385468, "learning_rate": 4.7339063717455556e-05, "loss": 0.9659174680709839, "step": 2972 }, { "epoch": 3.647852760736196, "grad_norm": 0.24478378891944885, "learning_rate": 4.7336809971989296e-05, "loss": 0.6147475242614746, "step": 2973 }, { "epoch": 3.6490797546012272, "grad_norm": 0.28677821159362793, "learning_rate": 4.733455532618993e-05, "loss": 0.7972429990768433, "step": 2974 }, { "epoch": 3.6503067484662575, "grad_norm": 0.19909273087978363, "learning_rate": 4.733229978014835e-05, "loss": 0.8037137985229492, "step": 2975 }, { "epoch": 3.6515337423312886, "grad_norm": 0.2688767910003662, "learning_rate": 4.7330043333955445e-05, "loss": 0.8975334167480469, "step": 2976 }, { "epoch": 3.652760736196319, "grad_norm": 0.21997664868831635, "learning_rate": 4.7327785987702186e-05, "loss": 0.6480636596679688, "step": 2977 }, { "epoch": 3.65398773006135, "grad_norm": 0.16756561398506165, "learning_rate": 4.7325527741479564e-05, "loss": 0.9805964827537537, "step": 2978 }, { "epoch": 3.65521472392638, "grad_norm": 0.19484534859657288, "learning_rate": 4.7323268595378585e-05, "loss": 0.8718127012252808, "step": 2979 }, { "epoch": 3.6564417177914113, "grad_norm": 0.20751911401748657, "learning_rate": 4.732100854949032e-05, "loss": 0.9279433488845825, "step": 2980 }, { "epoch": 3.6576687116564415, "grad_norm": 0.21381846070289612, "learning_rate": 4.7318747603905855e-05, "loss": 0.9277938604354858, "step": 2981 }, { "epoch": 3.6588957055214726, "grad_norm": 0.24581566452980042, "learning_rate": 4.731648575871633e-05, "loss": 0.7071151733398438, "step": 2982 }, { "epoch": 3.660122699386503, "grad_norm": 0.19864314794540405, "learning_rate": 4.731422301401292e-05, "loss": 0.8589296340942383, "step": 2983 }, { "epoch": 3.661349693251534, "grad_norm": 0.17796555161476135, "learning_rate": 4.7311959369886825e-05, "loss": 0.9478262066841125, "step": 2984 }, { "epoch": 3.662576687116564, "grad_norm": 0.22252903878688812, "learning_rate": 4.730969482642927e-05, "loss": 0.7757445573806763, "step": 2985 }, { "epoch": 3.6638036809815953, "grad_norm": 0.2265145629644394, "learning_rate": 4.730742938373155e-05, "loss": 0.8327207565307617, "step": 2986 }, { "epoch": 3.665030674846626, "grad_norm": 0.22059866786003113, "learning_rate": 4.730516304188498e-05, "loss": 0.7460079193115234, "step": 2987 }, { "epoch": 3.6662576687116566, "grad_norm": 0.2411683201789856, "learning_rate": 4.730289580098091e-05, "loss": 0.7902835607528687, "step": 2988 }, { "epoch": 3.6674846625766873, "grad_norm": 0.2275787889957428, "learning_rate": 4.7300627661110706e-05, "loss": 0.797879695892334, "step": 2989 }, { "epoch": 3.668711656441718, "grad_norm": 0.2339300960302353, "learning_rate": 4.729835862236581e-05, "loss": 0.8354973196983337, "step": 2990 }, { "epoch": 3.6699386503067486, "grad_norm": 0.18972839415073395, "learning_rate": 4.7296088684837674e-05, "loss": 0.7904305458068848, "step": 2991 }, { "epoch": 3.6711656441717793, "grad_norm": 0.20584753155708313, "learning_rate": 4.729381784861779e-05, "loss": 0.9488053917884827, "step": 2992 }, { "epoch": 3.67239263803681, "grad_norm": 0.20234869420528412, "learning_rate": 4.72915461137977e-05, "loss": 0.9025130271911621, "step": 2993 }, { "epoch": 3.6736196319018406, "grad_norm": 0.2482556700706482, "learning_rate": 4.7289273480468964e-05, "loss": 0.8929672837257385, "step": 2994 }, { "epoch": 3.6748466257668713, "grad_norm": 0.22544805705547333, "learning_rate": 4.7286999948723176e-05, "loss": 0.8294514417648315, "step": 2995 }, { "epoch": 3.676073619631902, "grad_norm": 0.1832275539636612, "learning_rate": 4.7284725518651995e-05, "loss": 0.8979113101959229, "step": 2996 }, { "epoch": 3.6773006134969326, "grad_norm": 0.19883476197719574, "learning_rate": 4.7282450190347084e-05, "loss": 0.8680517077445984, "step": 2997 }, { "epoch": 3.6785276073619633, "grad_norm": 0.21487338840961456, "learning_rate": 4.728017396390015e-05, "loss": 0.700871467590332, "step": 2998 }, { "epoch": 3.679754601226994, "grad_norm": 0.19451753795146942, "learning_rate": 4.7277896839402955e-05, "loss": 0.9118481874465942, "step": 2999 }, { "epoch": 3.6809815950920246, "grad_norm": 0.2130231112241745, "learning_rate": 4.727561881694727e-05, "loss": 0.9502015113830566, "step": 3000 }, { "epoch": 3.6822085889570553, "grad_norm": 0.2090962827205658, "learning_rate": 4.727333989662493e-05, "loss": 0.9071871042251587, "step": 3001 }, { "epoch": 3.683435582822086, "grad_norm": 0.25182589888572693, "learning_rate": 4.727106007852779e-05, "loss": 0.6749973297119141, "step": 3002 }, { "epoch": 3.6846625766871166, "grad_norm": 0.209199920296669, "learning_rate": 4.726877936274772e-05, "loss": 0.7931289672851562, "step": 3003 }, { "epoch": 3.6858895705521473, "grad_norm": 0.19585926830768585, "learning_rate": 4.726649774937668e-05, "loss": 0.8332434892654419, "step": 3004 }, { "epoch": 3.687116564417178, "grad_norm": 0.1935826689004898, "learning_rate": 4.726421523850662e-05, "loss": 0.7593814730644226, "step": 3005 }, { "epoch": 3.6883435582822086, "grad_norm": 0.23171366751194, "learning_rate": 4.7261931830229535e-05, "loss": 0.8558066487312317, "step": 3006 }, { "epoch": 3.6895705521472393, "grad_norm": 0.25691795349121094, "learning_rate": 4.725964752463748e-05, "loss": 0.7836499810218811, "step": 3007 }, { "epoch": 3.69079754601227, "grad_norm": 0.22686539590358734, "learning_rate": 4.725736232182252e-05, "loss": 0.7175941467285156, "step": 3008 }, { "epoch": 3.6920245398773006, "grad_norm": 0.20600178837776184, "learning_rate": 4.725507622187676e-05, "loss": 0.8958795070648193, "step": 3009 }, { "epoch": 3.6932515337423313, "grad_norm": 0.22461742162704468, "learning_rate": 4.725278922489235e-05, "loss": 0.8525354862213135, "step": 3010 }, { "epoch": 3.694478527607362, "grad_norm": 0.20130641758441925, "learning_rate": 4.7250501330961475e-05, "loss": 0.8852313160896301, "step": 3011 }, { "epoch": 3.6957055214723926, "grad_norm": 0.24042075872421265, "learning_rate": 4.724821254017635e-05, "loss": 0.8378448486328125, "step": 3012 }, { "epoch": 3.6969325153374233, "grad_norm": 0.18374435603618622, "learning_rate": 4.7245922852629234e-05, "loss": 0.8970959782600403, "step": 3013 }, { "epoch": 3.698159509202454, "grad_norm": 0.20161104202270508, "learning_rate": 4.724363226841242e-05, "loss": 0.8335268497467041, "step": 3014 }, { "epoch": 3.6993865030674846, "grad_norm": 0.17687153816223145, "learning_rate": 4.724134078761822e-05, "loss": 0.9546188116073608, "step": 3015 }, { "epoch": 3.7006134969325153, "grad_norm": 0.21988339722156525, "learning_rate": 4.723904841033902e-05, "loss": 0.7274295091629028, "step": 3016 }, { "epoch": 3.701840490797546, "grad_norm": 0.23831655085086823, "learning_rate": 4.723675513666719e-05, "loss": 0.7521103620529175, "step": 3017 }, { "epoch": 3.7030674846625766, "grad_norm": 0.2331615686416626, "learning_rate": 4.7234460966695196e-05, "loss": 0.8434334993362427, "step": 3018 }, { "epoch": 3.7042944785276073, "grad_norm": 0.2142772525548935, "learning_rate": 4.7232165900515484e-05, "loss": 0.7975191473960876, "step": 3019 }, { "epoch": 3.705521472392638, "grad_norm": 0.23437488079071045, "learning_rate": 4.7229869938220586e-05, "loss": 0.8910279273986816, "step": 3020 }, { "epoch": 3.7067484662576686, "grad_norm": 0.18894225358963013, "learning_rate": 4.722757307990302e-05, "loss": 0.8570932745933533, "step": 3021 }, { "epoch": 3.7079754601226993, "grad_norm": 0.26159512996673584, "learning_rate": 4.7225275325655384e-05, "loss": 0.842028021812439, "step": 3022 }, { "epoch": 3.70920245398773, "grad_norm": 0.20357270538806915, "learning_rate": 4.722297667557028e-05, "loss": 0.7389840483665466, "step": 3023 }, { "epoch": 3.7104294478527606, "grad_norm": 0.22128131985664368, "learning_rate": 4.722067712974038e-05, "loss": 0.8619433045387268, "step": 3024 }, { "epoch": 3.7116564417177913, "grad_norm": 0.28996941447257996, "learning_rate": 4.721837668825835e-05, "loss": 0.6629365682601929, "step": 3025 }, { "epoch": 3.712883435582822, "grad_norm": 0.29501545429229736, "learning_rate": 4.7216075351216935e-05, "loss": 0.654949426651001, "step": 3026 }, { "epoch": 3.7141104294478526, "grad_norm": 0.23621304333209991, "learning_rate": 4.7213773118708874e-05, "loss": 0.7858953475952148, "step": 3027 }, { "epoch": 3.7153374233128833, "grad_norm": 0.21413420140743256, "learning_rate": 4.7211469990826984e-05, "loss": 0.6956388354301453, "step": 3028 }, { "epoch": 3.716564417177914, "grad_norm": 0.24637411534786224, "learning_rate": 4.720916596766408e-05, "loss": 0.8074411749839783, "step": 3029 }, { "epoch": 3.7177914110429446, "grad_norm": 0.17800447344779968, "learning_rate": 4.720686104931304e-05, "loss": 0.8117516040802002, "step": 3030 }, { "epoch": 3.7190184049079753, "grad_norm": 0.18430402874946594, "learning_rate": 4.720455523586677e-05, "loss": 0.8375200033187866, "step": 3031 }, { "epoch": 3.720245398773006, "grad_norm": 0.20484080910682678, "learning_rate": 4.720224852741821e-05, "loss": 0.9278824925422668, "step": 3032 }, { "epoch": 3.721472392638037, "grad_norm": 0.21320392191410065, "learning_rate": 4.7199940924060326e-05, "loss": 0.7524669170379639, "step": 3033 }, { "epoch": 3.7226993865030673, "grad_norm": 0.20071913301944733, "learning_rate": 4.719763242588615e-05, "loss": 0.8464797735214233, "step": 3034 }, { "epoch": 3.7239263803680984, "grad_norm": 0.2955157458782196, "learning_rate": 4.7195323032988716e-05, "loss": 0.5565398335456848, "step": 3035 }, { "epoch": 3.7251533742331286, "grad_norm": 0.20721979439258575, "learning_rate": 4.7193012745461106e-05, "loss": 0.7568377256393433, "step": 3036 }, { "epoch": 3.7263803680981598, "grad_norm": 0.23420429229736328, "learning_rate": 4.7190701563396455e-05, "loss": 0.646497905254364, "step": 3037 }, { "epoch": 3.72760736196319, "grad_norm": 0.215607687830925, "learning_rate": 4.718838948688792e-05, "loss": 0.7677655220031738, "step": 3038 }, { "epoch": 3.728834355828221, "grad_norm": 0.23079556226730347, "learning_rate": 4.718607651602868e-05, "loss": 0.8375901579856873, "step": 3039 }, { "epoch": 3.7300613496932513, "grad_norm": 0.18506959080696106, "learning_rate": 4.718376265091198e-05, "loss": 0.9223082065582275, "step": 3040 }, { "epoch": 3.7312883435582824, "grad_norm": 0.1993696391582489, "learning_rate": 4.718144789163107e-05, "loss": 0.8867127895355225, "step": 3041 }, { "epoch": 3.7325153374233127, "grad_norm": 0.21520577371120453, "learning_rate": 4.7179132238279256e-05, "loss": 0.868889331817627, "step": 3042 }, { "epoch": 3.7337423312883438, "grad_norm": 0.24071231484413147, "learning_rate": 4.717681569094988e-05, "loss": 0.8144047856330872, "step": 3043 }, { "epoch": 3.734969325153374, "grad_norm": 0.26398444175720215, "learning_rate": 4.717449824973632e-05, "loss": 0.7532665729522705, "step": 3044 }, { "epoch": 3.736196319018405, "grad_norm": 0.22811520099639893, "learning_rate": 4.7172179914731986e-05, "loss": 0.7397329807281494, "step": 3045 }, { "epoch": 3.7374233128834353, "grad_norm": 0.19471879303455353, "learning_rate": 4.71698606860303e-05, "loss": 0.8708534240722656, "step": 3046 }, { "epoch": 3.7386503067484664, "grad_norm": 0.21080109477043152, "learning_rate": 4.716754056372477e-05, "loss": 0.8427013158798218, "step": 3047 }, { "epoch": 3.7398773006134967, "grad_norm": 0.26899316906929016, "learning_rate": 4.71652195479089e-05, "loss": 0.5993040800094604, "step": 3048 }, { "epoch": 3.7411042944785278, "grad_norm": 0.2093590497970581, "learning_rate": 4.716289763867625e-05, "loss": 0.7737412452697754, "step": 3049 }, { "epoch": 3.7423312883435584, "grad_norm": 0.19659417867660522, "learning_rate": 4.7160574836120404e-05, "loss": 0.8710744976997375, "step": 3050 }, { "epoch": 3.743558282208589, "grad_norm": 0.2322351187467575, "learning_rate": 4.7158251140334994e-05, "loss": 0.750251293182373, "step": 3051 }, { "epoch": 3.7447852760736198, "grad_norm": 0.481901079416275, "learning_rate": 4.715592655141367e-05, "loss": 0.9259017705917358, "step": 3052 }, { "epoch": 3.7460122699386504, "grad_norm": 0.21918357908725739, "learning_rate": 4.715360106945015e-05, "loss": 0.735756516456604, "step": 3053 }, { "epoch": 3.747239263803681, "grad_norm": 0.2080373466014862, "learning_rate": 4.715127469453814e-05, "loss": 0.7651180028915405, "step": 3054 }, { "epoch": 3.7484662576687118, "grad_norm": 0.21579167246818542, "learning_rate": 4.714894742677143e-05, "loss": 1.045265793800354, "step": 3055 }, { "epoch": 3.7496932515337424, "grad_norm": 0.2395690232515335, "learning_rate": 4.714661926624383e-05, "loss": 0.6776958703994751, "step": 3056 }, { "epoch": 3.750920245398773, "grad_norm": 0.1990596503019333, "learning_rate": 4.714429021304916e-05, "loss": 0.824318528175354, "step": 3057 }, { "epoch": 3.752147239263804, "grad_norm": 0.2058699131011963, "learning_rate": 4.7141960267281315e-05, "loss": 0.8683844804763794, "step": 3058 }, { "epoch": 3.7533742331288344, "grad_norm": 0.20519019663333893, "learning_rate": 4.713962942903419e-05, "loss": 0.7816389799118042, "step": 3059 }, { "epoch": 3.754601226993865, "grad_norm": 0.1827850341796875, "learning_rate": 4.7137297698401764e-05, "loss": 0.9379583597183228, "step": 3060 }, { "epoch": 3.755828220858896, "grad_norm": 0.16707788407802582, "learning_rate": 4.7134965075478e-05, "loss": 0.9899010062217712, "step": 3061 }, { "epoch": 3.7570552147239265, "grad_norm": 0.1994178146123886, "learning_rate": 4.7132631560356925e-05, "loss": 0.8871030807495117, "step": 3062 }, { "epoch": 3.758282208588957, "grad_norm": 0.23156942427158356, "learning_rate": 4.713029715313259e-05, "loss": 0.8730577230453491, "step": 3063 }, { "epoch": 3.759509202453988, "grad_norm": 0.24775050580501556, "learning_rate": 4.71279618538991e-05, "loss": 0.8122750520706177, "step": 3064 }, { "epoch": 3.7607361963190185, "grad_norm": 0.22668078541755676, "learning_rate": 4.712562566275057e-05, "loss": 0.7836704254150391, "step": 3065 }, { "epoch": 3.761963190184049, "grad_norm": 0.24291473627090454, "learning_rate": 4.712328857978119e-05, "loss": 0.6995974779129028, "step": 3066 }, { "epoch": 3.76319018404908, "grad_norm": 0.19204235076904297, "learning_rate": 4.7120950605085136e-05, "loss": 0.8947439789772034, "step": 3067 }, { "epoch": 3.7644171779141105, "grad_norm": 0.20148013532161713, "learning_rate": 4.711861173875665e-05, "loss": 0.7346513271331787, "step": 3068 }, { "epoch": 3.765644171779141, "grad_norm": 0.2245096117258072, "learning_rate": 4.711627198089001e-05, "loss": 0.7742327451705933, "step": 3069 }, { "epoch": 3.766871165644172, "grad_norm": 0.2187526375055313, "learning_rate": 4.711393133157953e-05, "loss": 0.8543814420700073, "step": 3070 }, { "epoch": 3.7680981595092025, "grad_norm": 0.1903151124715805, "learning_rate": 4.711158979091954e-05, "loss": 0.9249255061149597, "step": 3071 }, { "epoch": 3.769325153374233, "grad_norm": 0.19310389459133148, "learning_rate": 4.710924735900444e-05, "loss": 0.9272814989089966, "step": 3072 }, { "epoch": 3.770552147239264, "grad_norm": 0.20268088579177856, "learning_rate": 4.7106904035928636e-05, "loss": 0.7602343559265137, "step": 3073 }, { "epoch": 3.7717791411042945, "grad_norm": 0.19119741022586823, "learning_rate": 4.710455982178658e-05, "loss": 0.9115394949913025, "step": 3074 }, { "epoch": 3.773006134969325, "grad_norm": 0.23035861551761627, "learning_rate": 4.710221471667276e-05, "loss": 0.8310747146606445, "step": 3075 }, { "epoch": 3.774233128834356, "grad_norm": 0.20790903270244598, "learning_rate": 4.709986872068169e-05, "loss": 0.7493005990982056, "step": 3076 }, { "epoch": 3.7754601226993865, "grad_norm": 0.18490107357501984, "learning_rate": 4.709752183390795e-05, "loss": 0.9594011902809143, "step": 3077 }, { "epoch": 3.776687116564417, "grad_norm": 0.42650386691093445, "learning_rate": 4.709517405644614e-05, "loss": 0.7333958148956299, "step": 3078 }, { "epoch": 3.777914110429448, "grad_norm": 0.2331901639699936, "learning_rate": 4.709282538839087e-05, "loss": 0.7886043787002563, "step": 3079 }, { "epoch": 3.7791411042944785, "grad_norm": 0.24201878905296326, "learning_rate": 4.709047582983683e-05, "loss": 0.8062212467193604, "step": 3080 }, { "epoch": 3.780368098159509, "grad_norm": 0.18858246505260468, "learning_rate": 4.70881253808787e-05, "loss": 0.9502540230751038, "step": 3081 }, { "epoch": 3.78159509202454, "grad_norm": 0.21329237520694733, "learning_rate": 4.7085774041611244e-05, "loss": 0.8411306142807007, "step": 3082 }, { "epoch": 3.7828220858895705, "grad_norm": 0.20796653628349304, "learning_rate": 4.708342181212923e-05, "loss": 0.8375673294067383, "step": 3083 }, { "epoch": 3.784049079754601, "grad_norm": 0.20875564217567444, "learning_rate": 4.7081068692527454e-05, "loss": 0.8058754205703735, "step": 3084 }, { "epoch": 3.785276073619632, "grad_norm": 0.2438916712999344, "learning_rate": 4.707871468290078e-05, "loss": 0.9806920289993286, "step": 3085 }, { "epoch": 3.7865030674846625, "grad_norm": 0.22066998481750488, "learning_rate": 4.707635978334409e-05, "loss": 0.7933477759361267, "step": 3086 }, { "epoch": 3.787730061349693, "grad_norm": 0.22129708528518677, "learning_rate": 4.70740039939523e-05, "loss": 0.6945122480392456, "step": 3087 }, { "epoch": 3.788957055214724, "grad_norm": 0.2032579630613327, "learning_rate": 4.707164731482037e-05, "loss": 0.9052896499633789, "step": 3088 }, { "epoch": 3.7901840490797545, "grad_norm": 0.15173271298408508, "learning_rate": 4.706928974604328e-05, "loss": 1.0421401262283325, "step": 3089 }, { "epoch": 3.791411042944785, "grad_norm": 0.19571036100387573, "learning_rate": 4.7066931287716065e-05, "loss": 0.8800414800643921, "step": 3090 }, { "epoch": 3.792638036809816, "grad_norm": 0.19897614419460297, "learning_rate": 4.706457193993379e-05, "loss": 0.928818941116333, "step": 3091 }, { "epoch": 3.7938650306748465, "grad_norm": 0.20283645391464233, "learning_rate": 4.7062211702791546e-05, "loss": 0.7511540651321411, "step": 3092 }, { "epoch": 3.795092024539877, "grad_norm": 0.22335316240787506, "learning_rate": 4.705985057638448e-05, "loss": 0.7104299068450928, "step": 3093 }, { "epoch": 3.796319018404908, "grad_norm": 0.3203394412994385, "learning_rate": 4.705748856080774e-05, "loss": 0.5907704830169678, "step": 3094 }, { "epoch": 3.7975460122699385, "grad_norm": 0.2053292840719223, "learning_rate": 4.7055125656156564e-05, "loss": 1.0318560600280762, "step": 3095 }, { "epoch": 3.7987730061349696, "grad_norm": 0.18045279383659363, "learning_rate": 4.705276186252616e-05, "loss": 0.7640049457550049, "step": 3096 }, { "epoch": 3.8, "grad_norm": 0.2024548351764679, "learning_rate": 4.7050397180011826e-05, "loss": 0.8382202386856079, "step": 3097 }, { "epoch": 3.801226993865031, "grad_norm": 0.24681520462036133, "learning_rate": 4.7048031608708876e-05, "loss": 0.8733450174331665, "step": 3098 }, { "epoch": 3.802453987730061, "grad_norm": 0.21568864583969116, "learning_rate": 4.704566514871266e-05, "loss": 0.6772434115409851, "step": 3099 }, { "epoch": 3.8036809815950923, "grad_norm": 0.19970469176769257, "learning_rate": 4.7043297800118546e-05, "loss": 0.7556033134460449, "step": 3100 }, { "epoch": 3.8049079754601225, "grad_norm": 0.17819252610206604, "learning_rate": 4.704092956302197e-05, "loss": 1.0100845098495483, "step": 3101 }, { "epoch": 3.8061349693251536, "grad_norm": 0.19002316892147064, "learning_rate": 4.703856043751839e-05, "loss": 0.9518768787384033, "step": 3102 }, { "epoch": 3.807361963190184, "grad_norm": 0.24920950829982758, "learning_rate": 4.703619042370329e-05, "loss": 0.6927230358123779, "step": 3103 }, { "epoch": 3.808588957055215, "grad_norm": 0.20511408150196075, "learning_rate": 4.7033819521672206e-05, "loss": 0.8981788754463196, "step": 3104 }, { "epoch": 3.809815950920245, "grad_norm": 0.19953221082687378, "learning_rate": 4.70314477315207e-05, "loss": 1.038527488708496, "step": 3105 }, { "epoch": 3.8110429447852763, "grad_norm": 0.19858641922473907, "learning_rate": 4.702907505334437e-05, "loss": 0.7885441780090332, "step": 3106 }, { "epoch": 3.8122699386503065, "grad_norm": 0.18315592408180237, "learning_rate": 4.7026701487238855e-05, "loss": 0.7918140888214111, "step": 3107 }, { "epoch": 3.8134969325153376, "grad_norm": 0.20823350548744202, "learning_rate": 4.702432703329982e-05, "loss": 0.6843761801719666, "step": 3108 }, { "epoch": 3.814723926380368, "grad_norm": 0.26803481578826904, "learning_rate": 4.702195169162299e-05, "loss": 0.7162749767303467, "step": 3109 }, { "epoch": 3.815950920245399, "grad_norm": 0.2559443712234497, "learning_rate": 4.701957546230409e-05, "loss": 0.6739755868911743, "step": 3110 }, { "epoch": 3.817177914110429, "grad_norm": 0.18960946798324585, "learning_rate": 4.7017198345438897e-05, "loss": 0.8620388507843018, "step": 3111 }, { "epoch": 3.8184049079754603, "grad_norm": 0.2397642880678177, "learning_rate": 4.701482034112324e-05, "loss": 0.8845123052597046, "step": 3112 }, { "epoch": 3.819631901840491, "grad_norm": 0.24402351677417755, "learning_rate": 4.701244144945296e-05, "loss": 0.7057490348815918, "step": 3113 }, { "epoch": 3.8208588957055216, "grad_norm": 0.2095574140548706, "learning_rate": 4.701006167052395e-05, "loss": 0.8983161449432373, "step": 3114 }, { "epoch": 3.8220858895705523, "grad_norm": 0.25412240624427795, "learning_rate": 4.700768100443213e-05, "loss": 0.7940540313720703, "step": 3115 }, { "epoch": 3.823312883435583, "grad_norm": 0.20860664546489716, "learning_rate": 4.700529945127345e-05, "loss": 0.750719428062439, "step": 3116 }, { "epoch": 3.8245398773006136, "grad_norm": 0.2549489140510559, "learning_rate": 4.700291701114392e-05, "loss": 0.7802289724349976, "step": 3117 }, { "epoch": 3.8257668711656443, "grad_norm": 0.21730467677116394, "learning_rate": 4.7000533684139555e-05, "loss": 0.8115694522857666, "step": 3118 }, { "epoch": 3.826993865030675, "grad_norm": 0.1820833534002304, "learning_rate": 4.699814947035642e-05, "loss": 0.8343486785888672, "step": 3119 }, { "epoch": 3.8282208588957056, "grad_norm": 0.20523583889007568, "learning_rate": 4.6995764369890624e-05, "loss": 0.8199865221977234, "step": 3120 }, { "epoch": 3.8294478527607363, "grad_norm": 0.21350979804992676, "learning_rate": 4.69933783828383e-05, "loss": 0.7863656282424927, "step": 3121 }, { "epoch": 3.830674846625767, "grad_norm": 0.2231009304523468, "learning_rate": 4.699099150929562e-05, "loss": 0.8076343536376953, "step": 3122 }, { "epoch": 3.8319018404907976, "grad_norm": 0.22577431797981262, "learning_rate": 4.69886037493588e-05, "loss": 0.6713608503341675, "step": 3123 }, { "epoch": 3.8331288343558283, "grad_norm": 0.2505715787410736, "learning_rate": 4.698621510312407e-05, "loss": 0.7234951257705688, "step": 3124 }, { "epoch": 3.834355828220859, "grad_norm": 0.22964142262935638, "learning_rate": 4.698382557068772e-05, "loss": 0.7194191813468933, "step": 3125 }, { "epoch": 3.8355828220858896, "grad_norm": 0.23549611866474152, "learning_rate": 4.698143515214605e-05, "loss": 0.7234092950820923, "step": 3126 }, { "epoch": 3.8368098159509203, "grad_norm": 0.21786773204803467, "learning_rate": 4.6979043847595434e-05, "loss": 0.7836154699325562, "step": 3127 }, { "epoch": 3.838036809815951, "grad_norm": 0.1880868822336197, "learning_rate": 4.697665165713224e-05, "loss": 0.8246264457702637, "step": 3128 }, { "epoch": 3.8392638036809816, "grad_norm": 0.19292493164539337, "learning_rate": 4.6974258580852905e-05, "loss": 0.7832986116409302, "step": 3129 }, { "epoch": 3.8404907975460123, "grad_norm": 0.2506791055202484, "learning_rate": 4.6971864618853864e-05, "loss": 0.7682678699493408, "step": 3130 }, { "epoch": 3.841717791411043, "grad_norm": 0.1995878964662552, "learning_rate": 4.696946977123164e-05, "loss": 1.006950855255127, "step": 3131 }, { "epoch": 3.8429447852760736, "grad_norm": 0.2416539192199707, "learning_rate": 4.6967074038082745e-05, "loss": 0.8084571957588196, "step": 3132 }, { "epoch": 3.8441717791411043, "grad_norm": 0.2533104717731476, "learning_rate": 4.6964677419503745e-05, "loss": 0.8052799701690674, "step": 3133 }, { "epoch": 3.845398773006135, "grad_norm": 0.1999940127134323, "learning_rate": 4.6962279915591244e-05, "loss": 0.8645154237747192, "step": 3134 }, { "epoch": 3.8466257668711656, "grad_norm": 0.1826271265745163, "learning_rate": 4.695988152644187e-05, "loss": 0.8951525688171387, "step": 3135 }, { "epoch": 3.8478527607361963, "grad_norm": 0.21313783526420593, "learning_rate": 4.695748225215232e-05, "loss": 0.6923319697380066, "step": 3136 }, { "epoch": 3.849079754601227, "grad_norm": 0.2581528127193451, "learning_rate": 4.695508209281928e-05, "loss": 0.8175395727157593, "step": 3137 }, { "epoch": 3.8503067484662576, "grad_norm": 0.23270055651664734, "learning_rate": 4.695268104853949e-05, "loss": 0.8308453559875488, "step": 3138 }, { "epoch": 3.8515337423312883, "grad_norm": 0.23739999532699585, "learning_rate": 4.6950279119409744e-05, "loss": 0.7454982995986938, "step": 3139 }, { "epoch": 3.852760736196319, "grad_norm": 0.20339177548885345, "learning_rate": 4.694787630552685e-05, "loss": 0.7188360691070557, "step": 3140 }, { "epoch": 3.8539877300613496, "grad_norm": 0.20312587916851044, "learning_rate": 4.694547260698766e-05, "loss": 0.7285928726196289, "step": 3141 }, { "epoch": 3.8552147239263803, "grad_norm": 0.19278854131698608, "learning_rate": 4.694306802388906e-05, "loss": 0.9140352010726929, "step": 3142 }, { "epoch": 3.856441717791411, "grad_norm": 0.2720522880554199, "learning_rate": 4.694066255632797e-05, "loss": 0.7726398706436157, "step": 3143 }, { "epoch": 3.8576687116564417, "grad_norm": 0.2062380313873291, "learning_rate": 4.693825620440135e-05, "loss": 0.8845230937004089, "step": 3144 }, { "epoch": 3.8588957055214723, "grad_norm": 0.1911204606294632, "learning_rate": 4.693584896820619e-05, "loss": 0.8010851144790649, "step": 3145 }, { "epoch": 3.860122699386503, "grad_norm": 0.21902234852313995, "learning_rate": 4.693344084783953e-05, "loss": 0.8711166381835938, "step": 3146 }, { "epoch": 3.8613496932515337, "grad_norm": 0.2396697998046875, "learning_rate": 4.693103184339842e-05, "loss": 0.7449647188186646, "step": 3147 }, { "epoch": 3.8625766871165643, "grad_norm": 0.23180873692035675, "learning_rate": 4.692862195497996e-05, "loss": 0.8553845882415771, "step": 3148 }, { "epoch": 3.863803680981595, "grad_norm": 0.19299232959747314, "learning_rate": 4.69262111826813e-05, "loss": 0.8312106132507324, "step": 3149 }, { "epoch": 3.8650306748466257, "grad_norm": 0.2053598314523697, "learning_rate": 4.6923799526599594e-05, "loss": 0.8532803058624268, "step": 3150 }, { "epoch": 3.8662576687116563, "grad_norm": 0.19748005270957947, "learning_rate": 4.692138698683206e-05, "loss": 0.8696162104606628, "step": 3151 }, { "epoch": 3.867484662576687, "grad_norm": 0.22602735459804535, "learning_rate": 4.6918973563475944e-05, "loss": 0.8349682092666626, "step": 3152 }, { "epoch": 3.8687116564417177, "grad_norm": 0.2552856206893921, "learning_rate": 4.691655925662851e-05, "loss": 0.6458736658096313, "step": 3153 }, { "epoch": 3.8699386503067483, "grad_norm": 0.20297686755657196, "learning_rate": 4.691414406638709e-05, "loss": 0.8443502187728882, "step": 3154 }, { "epoch": 3.871165644171779, "grad_norm": 0.21627837419509888, "learning_rate": 4.691172799284902e-05, "loss": 0.797524631023407, "step": 3155 }, { "epoch": 3.8723926380368097, "grad_norm": 0.222056046128273, "learning_rate": 4.690931103611168e-05, "loss": 0.7720383405685425, "step": 3156 }, { "epoch": 3.8736196319018403, "grad_norm": 0.2006281167268753, "learning_rate": 4.6906893196272513e-05, "loss": 0.8801804780960083, "step": 3157 }, { "epoch": 3.874846625766871, "grad_norm": 0.25079628825187683, "learning_rate": 4.690447447342896e-05, "loss": 0.6795072555541992, "step": 3158 }, { "epoch": 3.876073619631902, "grad_norm": 0.19455653429031372, "learning_rate": 4.690205486767851e-05, "loss": 0.8079402446746826, "step": 3159 }, { "epoch": 3.8773006134969323, "grad_norm": 0.2212352752685547, "learning_rate": 4.689963437911869e-05, "loss": 0.7383760809898376, "step": 3160 }, { "epoch": 3.8785276073619634, "grad_norm": 0.22560523450374603, "learning_rate": 4.6897213007847074e-05, "loss": 0.768693208694458, "step": 3161 }, { "epoch": 3.8797546012269937, "grad_norm": 0.21285073459148407, "learning_rate": 4.689479075396126e-05, "loss": 0.8925069570541382, "step": 3162 }, { "epoch": 3.880981595092025, "grad_norm": 0.19974631071090698, "learning_rate": 4.689236761755886e-05, "loss": 0.9525362253189087, "step": 3163 }, { "epoch": 3.882208588957055, "grad_norm": 0.18716074526309967, "learning_rate": 4.6889943598737576e-05, "loss": 0.8666438460350037, "step": 3164 }, { "epoch": 3.883435582822086, "grad_norm": 0.22439877688884735, "learning_rate": 4.6887518697595096e-05, "loss": 0.8869301676750183, "step": 3165 }, { "epoch": 3.8846625766871163, "grad_norm": 0.17122644186019897, "learning_rate": 4.6885092914229156e-05, "loss": 0.7299416065216064, "step": 3166 }, { "epoch": 3.8858895705521475, "grad_norm": 0.2673850953578949, "learning_rate": 4.6882666248737545e-05, "loss": 0.7647961378097534, "step": 3167 }, { "epoch": 3.8871165644171777, "grad_norm": 0.21341557800769806, "learning_rate": 4.688023870121806e-05, "loss": 0.877744197845459, "step": 3168 }, { "epoch": 3.888343558282209, "grad_norm": 0.20788167417049408, "learning_rate": 4.687781027176857e-05, "loss": 0.7532806396484375, "step": 3169 }, { "epoch": 3.889570552147239, "grad_norm": 0.17979983985424042, "learning_rate": 4.687538096048694e-05, "loss": 0.7996410131454468, "step": 3170 }, { "epoch": 3.89079754601227, "grad_norm": 0.21578295528888702, "learning_rate": 4.687295076747109e-05, "loss": 0.7787262201309204, "step": 3171 }, { "epoch": 3.8920245398773003, "grad_norm": 0.22503389418125153, "learning_rate": 4.687051969281898e-05, "loss": 0.84987473487854, "step": 3172 }, { "epoch": 3.8932515337423315, "grad_norm": 0.23157398402690887, "learning_rate": 4.6868087736628597e-05, "loss": 0.6179154515266418, "step": 3173 }, { "epoch": 3.894478527607362, "grad_norm": 0.23306170105934143, "learning_rate": 4.6865654898997966e-05, "loss": 0.784021258354187, "step": 3174 }, { "epoch": 3.895705521472393, "grad_norm": 0.18435034155845642, "learning_rate": 4.686322118002515e-05, "loss": 0.8957061171531677, "step": 3175 }, { "epoch": 3.8969325153374235, "grad_norm": 0.294047087430954, "learning_rate": 4.686078657980825e-05, "loss": 0.7585393190383911, "step": 3176 }, { "epoch": 3.898159509202454, "grad_norm": 0.24081043899059296, "learning_rate": 4.6858351098445386e-05, "loss": 0.7688722610473633, "step": 3177 }, { "epoch": 3.899386503067485, "grad_norm": 0.23669077455997467, "learning_rate": 4.685591473603473e-05, "loss": 0.778765857219696, "step": 3178 }, { "epoch": 3.9006134969325155, "grad_norm": 0.29423388838768005, "learning_rate": 4.685347749267449e-05, "loss": 0.6825271248817444, "step": 3179 }, { "epoch": 3.901840490797546, "grad_norm": 0.18500526249408722, "learning_rate": 4.68510393684629e-05, "loss": 0.8211668729782104, "step": 3180 }, { "epoch": 3.903067484662577, "grad_norm": 0.20127686858177185, "learning_rate": 4.684860036349824e-05, "loss": 0.7871841788291931, "step": 3181 }, { "epoch": 3.9042944785276075, "grad_norm": 0.267075777053833, "learning_rate": 4.68461604778788e-05, "loss": 0.7036057114601135, "step": 3182 }, { "epoch": 3.905521472392638, "grad_norm": 0.19933772087097168, "learning_rate": 4.6843719711702946e-05, "loss": 0.9470090866088867, "step": 3183 }, { "epoch": 3.906748466257669, "grad_norm": 0.20297841727733612, "learning_rate": 4.684127806506905e-05, "loss": 0.8889062404632568, "step": 3184 }, { "epoch": 3.9079754601226995, "grad_norm": 0.2242184430360794, "learning_rate": 4.6838835538075523e-05, "loss": 0.9748712182044983, "step": 3185 }, { "epoch": 3.90920245398773, "grad_norm": 0.2090548276901245, "learning_rate": 4.6836392130820826e-05, "loss": 0.7145850658416748, "step": 3186 }, { "epoch": 3.910429447852761, "grad_norm": 0.19585926830768585, "learning_rate": 4.6833947843403446e-05, "loss": 0.7067642211914062, "step": 3187 }, { "epoch": 3.9116564417177915, "grad_norm": 0.2639336585998535, "learning_rate": 4.68315026759219e-05, "loss": 0.6751725077629089, "step": 3188 }, { "epoch": 3.912883435582822, "grad_norm": 0.2669781744480133, "learning_rate": 4.6829056628474745e-05, "loss": 0.8311625719070435, "step": 3189 }, { "epoch": 3.914110429447853, "grad_norm": 0.21477863192558289, "learning_rate": 4.682660970116057e-05, "loss": 0.9414096474647522, "step": 3190 }, { "epoch": 3.9153374233128835, "grad_norm": 0.1678326576948166, "learning_rate": 4.682416189407801e-05, "loss": 0.9432032108306885, "step": 3191 }, { "epoch": 3.916564417177914, "grad_norm": 0.23435671627521515, "learning_rate": 4.6821713207325735e-05, "loss": 0.847783088684082, "step": 3192 }, { "epoch": 3.917791411042945, "grad_norm": 0.21712486445903778, "learning_rate": 4.681926364100244e-05, "loss": 0.8960050344467163, "step": 3193 }, { "epoch": 3.9190184049079755, "grad_norm": 0.22289350628852844, "learning_rate": 4.681681319520685e-05, "loss": 0.8586206436157227, "step": 3194 }, { "epoch": 3.920245398773006, "grad_norm": 0.25435754656791687, "learning_rate": 4.681436187003775e-05, "loss": 0.7467636466026306, "step": 3195 }, { "epoch": 3.921472392638037, "grad_norm": 0.20967236161231995, "learning_rate": 4.681190966559393e-05, "loss": 0.6841154098510742, "step": 3196 }, { "epoch": 3.9226993865030675, "grad_norm": 0.19160987436771393, "learning_rate": 4.6809456581974254e-05, "loss": 0.8581832051277161, "step": 3197 }, { "epoch": 3.923926380368098, "grad_norm": 0.2262953370809555, "learning_rate": 4.680700261927758e-05, "loss": 0.7916795015335083, "step": 3198 }, { "epoch": 3.925153374233129, "grad_norm": 0.2270493358373642, "learning_rate": 4.6804547777602826e-05, "loss": 0.8113992214202881, "step": 3199 }, { "epoch": 3.9263803680981595, "grad_norm": 0.21697047352790833, "learning_rate": 4.680209205704894e-05, "loss": 0.8037466406822205, "step": 3200 }, { "epoch": 3.92760736196319, "grad_norm": 0.20274797081947327, "learning_rate": 4.6799635457714894e-05, "loss": 0.8839085102081299, "step": 3201 }, { "epoch": 3.928834355828221, "grad_norm": 0.25382280349731445, "learning_rate": 4.679717797969973e-05, "loss": 0.711317777633667, "step": 3202 }, { "epoch": 3.9300613496932515, "grad_norm": 0.19732606410980225, "learning_rate": 4.679471962310249e-05, "loss": 0.8643090724945068, "step": 3203 }, { "epoch": 3.931288343558282, "grad_norm": 0.18443314731121063, "learning_rate": 4.6792260388022256e-05, "loss": 0.8020271062850952, "step": 3204 }, { "epoch": 3.932515337423313, "grad_norm": 0.19332750141620636, "learning_rate": 4.678980027455816e-05, "loss": 0.8808014392852783, "step": 3205 }, { "epoch": 3.9337423312883435, "grad_norm": 0.22313925623893738, "learning_rate": 4.6787339282809365e-05, "loss": 0.7053372859954834, "step": 3206 }, { "epoch": 3.934969325153374, "grad_norm": 0.20952783524990082, "learning_rate": 4.678487741287506e-05, "loss": 0.9803808927536011, "step": 3207 }, { "epoch": 3.936196319018405, "grad_norm": 0.20923274755477905, "learning_rate": 4.6782414664854476e-05, "loss": 0.9207307696342468, "step": 3208 }, { "epoch": 3.9374233128834355, "grad_norm": 0.19085796177387238, "learning_rate": 4.677995103884689e-05, "loss": 0.8169717788696289, "step": 3209 }, { "epoch": 3.938650306748466, "grad_norm": 0.24985261261463165, "learning_rate": 4.677748653495159e-05, "loss": 0.5665106177330017, "step": 3210 }, { "epoch": 3.939877300613497, "grad_norm": 0.2278100997209549, "learning_rate": 4.6775021153267924e-05, "loss": 0.7802888751029968, "step": 3211 }, { "epoch": 3.9411042944785275, "grad_norm": 0.22370672225952148, "learning_rate": 4.6772554893895256e-05, "loss": 0.8795849084854126, "step": 3212 }, { "epoch": 3.942331288343558, "grad_norm": 0.19319848716259003, "learning_rate": 4.6770087756932995e-05, "loss": 0.7267938852310181, "step": 3213 }, { "epoch": 3.943558282208589, "grad_norm": 0.19318543374538422, "learning_rate": 4.6767619742480595e-05, "loss": 0.9668416380882263, "step": 3214 }, { "epoch": 3.9447852760736195, "grad_norm": 0.23542799055576324, "learning_rate": 4.676515085063752e-05, "loss": 0.7159901261329651, "step": 3215 }, { "epoch": 3.94601226993865, "grad_norm": 0.19943355023860931, "learning_rate": 4.676268108150329e-05, "loss": 0.9150726795196533, "step": 3216 }, { "epoch": 3.947239263803681, "grad_norm": 0.21725359559059143, "learning_rate": 4.6760210435177455e-05, "loss": 0.7435075044631958, "step": 3217 }, { "epoch": 3.9484662576687115, "grad_norm": 0.2060389518737793, "learning_rate": 4.6757738911759605e-05, "loss": 0.8111745119094849, "step": 3218 }, { "epoch": 3.949693251533742, "grad_norm": 0.2534942924976349, "learning_rate": 4.675526651134935e-05, "loss": 0.7718069553375244, "step": 3219 }, { "epoch": 3.950920245398773, "grad_norm": 0.20742614567279816, "learning_rate": 4.675279323404634e-05, "loss": 0.7948153018951416, "step": 3220 }, { "epoch": 3.9521472392638035, "grad_norm": 0.25500527024269104, "learning_rate": 4.675031907995029e-05, "loss": 0.6484254598617554, "step": 3221 }, { "epoch": 3.9533742331288346, "grad_norm": 0.23686334490776062, "learning_rate": 4.674784404916091e-05, "loss": 0.7499872446060181, "step": 3222 }, { "epoch": 3.954601226993865, "grad_norm": 0.23046137392520905, "learning_rate": 4.674536814177796e-05, "loss": 0.8191676139831543, "step": 3223 }, { "epoch": 3.955828220858896, "grad_norm": 0.2084094136953354, "learning_rate": 4.674289135790124e-05, "loss": 0.7894273400306702, "step": 3224 }, { "epoch": 3.957055214723926, "grad_norm": 0.2060163915157318, "learning_rate": 4.674041369763058e-05, "loss": 0.9698964357376099, "step": 3225 }, { "epoch": 3.9582822085889573, "grad_norm": 0.19099581241607666, "learning_rate": 4.673793516106586e-05, "loss": 0.9127892851829529, "step": 3226 }, { "epoch": 3.9595092024539875, "grad_norm": 0.21928712725639343, "learning_rate": 4.673545574830696e-05, "loss": 0.7967227101325989, "step": 3227 }, { "epoch": 3.9607361963190186, "grad_norm": 0.2186882644891739, "learning_rate": 4.673297545945384e-05, "loss": 0.730832576751709, "step": 3228 }, { "epoch": 3.961963190184049, "grad_norm": 0.21423207223415375, "learning_rate": 4.6730494294606454e-05, "loss": 0.8195887207984924, "step": 3229 }, { "epoch": 3.96319018404908, "grad_norm": 0.22583849728107452, "learning_rate": 4.672801225386483e-05, "loss": 0.8826945424079895, "step": 3230 }, { "epoch": 3.96441717791411, "grad_norm": 0.488756388425827, "learning_rate": 4.6725529337328985e-05, "loss": 0.9087420701980591, "step": 3231 }, { "epoch": 3.9656441717791413, "grad_norm": 0.3124813735485077, "learning_rate": 4.672304554509903e-05, "loss": 0.5601712465286255, "step": 3232 }, { "epoch": 3.9668711656441715, "grad_norm": 0.28205496072769165, "learning_rate": 4.6720560877275064e-05, "loss": 0.8114287853240967, "step": 3233 }, { "epoch": 3.9680981595092026, "grad_norm": 0.19217628240585327, "learning_rate": 4.671807533395724e-05, "loss": 0.9361621141433716, "step": 3234 }, { "epoch": 3.969325153374233, "grad_norm": 0.21277129650115967, "learning_rate": 4.6715588915245735e-05, "loss": 0.8715577125549316, "step": 3235 }, { "epoch": 3.970552147239264, "grad_norm": 0.20257256925106049, "learning_rate": 4.671310162124078e-05, "loss": 0.9694746136665344, "step": 3236 }, { "epoch": 3.9717791411042946, "grad_norm": 0.21045835316181183, "learning_rate": 4.671061345204262e-05, "loss": 0.8580386638641357, "step": 3237 }, { "epoch": 3.9730061349693253, "grad_norm": 0.2530234456062317, "learning_rate": 4.6708124407751564e-05, "loss": 0.7557981014251709, "step": 3238 }, { "epoch": 3.974233128834356, "grad_norm": 0.20519804954528809, "learning_rate": 4.670563448846792e-05, "loss": 0.8322328925132751, "step": 3239 }, { "epoch": 3.9754601226993866, "grad_norm": 0.24414533376693726, "learning_rate": 4.670314369429205e-05, "loss": 0.7906012535095215, "step": 3240 }, { "epoch": 3.9766871165644173, "grad_norm": 0.2544965445995331, "learning_rate": 4.670065202532437e-05, "loss": 0.6711114645004272, "step": 3241 }, { "epoch": 3.977914110429448, "grad_norm": 0.22850702702999115, "learning_rate": 4.6698159481665286e-05, "loss": 0.635490894317627, "step": 3242 }, { "epoch": 3.9791411042944786, "grad_norm": 0.18772704899311066, "learning_rate": 4.669566606341529e-05, "loss": 0.8472541570663452, "step": 3243 }, { "epoch": 3.9803680981595093, "grad_norm": 0.20630383491516113, "learning_rate": 4.669317177067486e-05, "loss": 0.7604770660400391, "step": 3244 }, { "epoch": 3.98159509202454, "grad_norm": 0.2018914669752121, "learning_rate": 4.6690676603544556e-05, "loss": 1.000391960144043, "step": 3245 }, { "epoch": 3.9828220858895707, "grad_norm": 0.2307661473751068, "learning_rate": 4.668818056212494e-05, "loss": 0.723967969417572, "step": 3246 }, { "epoch": 3.9840490797546013, "grad_norm": 0.23044562339782715, "learning_rate": 4.668568364651662e-05, "loss": 0.9456946849822998, "step": 3247 }, { "epoch": 3.985276073619632, "grad_norm": 0.2008390873670578, "learning_rate": 4.668318585682024e-05, "loss": 0.9181065559387207, "step": 3248 }, { "epoch": 3.9865030674846627, "grad_norm": 0.2124657928943634, "learning_rate": 4.668068719313649e-05, "loss": 0.8194364309310913, "step": 3249 }, { "epoch": 3.9877300613496933, "grad_norm": 0.23362115025520325, "learning_rate": 4.667818765556607e-05, "loss": 0.9184059500694275, "step": 3250 }, { "epoch": 3.988957055214724, "grad_norm": 0.18415234982967377, "learning_rate": 4.667568724420973e-05, "loss": 0.8571979403495789, "step": 3251 }, { "epoch": 3.9901840490797547, "grad_norm": 0.23609453439712524, "learning_rate": 4.667318595916826e-05, "loss": 0.8336180448532104, "step": 3252 }, { "epoch": 3.9914110429447853, "grad_norm": 0.20946210622787476, "learning_rate": 4.667068380054248e-05, "loss": 0.6728684902191162, "step": 3253 }, { "epoch": 3.992638036809816, "grad_norm": 0.19214855134487152, "learning_rate": 4.666818076843324e-05, "loss": 0.896338701248169, "step": 3254 }, { "epoch": 3.9938650306748467, "grad_norm": 0.19645367562770844, "learning_rate": 4.666567686294143e-05, "loss": 0.8362781405448914, "step": 3255 }, { "epoch": 3.9950920245398773, "grad_norm": 0.22349946200847626, "learning_rate": 4.666317208416799e-05, "loss": 0.617548942565918, "step": 3256 }, { "epoch": 3.996319018404908, "grad_norm": 0.19140873849391937, "learning_rate": 4.666066643221386e-05, "loss": 0.8749761581420898, "step": 3257 }, { "epoch": 3.9975460122699387, "grad_norm": 0.2476295530796051, "learning_rate": 4.6658159907180046e-05, "loss": 0.6335245370864868, "step": 3258 }, { "epoch": 3.9987730061349693, "grad_norm": 0.21797820925712585, "learning_rate": 4.6655652509167574e-05, "loss": 0.7706530094146729, "step": 3259 }, { "epoch": 4.0, "grad_norm": 0.32699745893478394, "learning_rate": 4.665314423827751e-05, "loss": 0.6823253631591797, "step": 3260 }, { "epoch": 4.001226993865031, "grad_norm": 0.19296640157699585, "learning_rate": 4.665063509461097e-05, "loss": 0.7141658067703247, "step": 3261 }, { "epoch": 4.002453987730061, "grad_norm": 0.21058720350265503, "learning_rate": 4.664812507826907e-05, "loss": 0.7521272897720337, "step": 3262 }, { "epoch": 4.0036809815950924, "grad_norm": 0.21535784006118774, "learning_rate": 4.664561418935299e-05, "loss": 0.7765477895736694, "step": 3263 }, { "epoch": 4.004907975460123, "grad_norm": 0.21580739319324493, "learning_rate": 4.6643102427963945e-05, "loss": 0.7405478954315186, "step": 3264 }, { "epoch": 4.006134969325154, "grad_norm": 0.2062803953886032, "learning_rate": 4.664058979420316e-05, "loss": 0.8585652112960815, "step": 3265 }, { "epoch": 4.007361963190184, "grad_norm": 0.25144922733306885, "learning_rate": 4.663807628817193e-05, "loss": 0.8344151973724365, "step": 3266 }, { "epoch": 4.008588957055215, "grad_norm": 0.2246631234884262, "learning_rate": 4.663556190997156e-05, "loss": 0.8010050058364868, "step": 3267 }, { "epoch": 4.009815950920245, "grad_norm": 0.24964232742786407, "learning_rate": 4.663304665970338e-05, "loss": 0.7490226030349731, "step": 3268 }, { "epoch": 4.0110429447852765, "grad_norm": 0.17614316940307617, "learning_rate": 4.66305305374688e-05, "loss": 0.9639859199523926, "step": 3269 }, { "epoch": 4.012269938650307, "grad_norm": 0.21476353704929352, "learning_rate": 4.6628013543369234e-05, "loss": 0.8428143262863159, "step": 3270 }, { "epoch": 4.013496932515338, "grad_norm": 0.23656857013702393, "learning_rate": 4.6625495677506115e-05, "loss": 0.7446678876876831, "step": 3271 }, { "epoch": 4.014723926380368, "grad_norm": 0.2126644402742386, "learning_rate": 4.662297693998095e-05, "loss": 0.8596969246864319, "step": 3272 }, { "epoch": 4.015950920245399, "grad_norm": 0.18868300318717957, "learning_rate": 4.662045733089525e-05, "loss": 0.928602933883667, "step": 3273 }, { "epoch": 4.017177914110429, "grad_norm": 0.186029851436615, "learning_rate": 4.661793685035058e-05, "loss": 0.948005199432373, "step": 3274 }, { "epoch": 4.0184049079754605, "grad_norm": 0.17912782728672028, "learning_rate": 4.661541549844853e-05, "loss": 0.7528438568115234, "step": 3275 }, { "epoch": 4.019631901840491, "grad_norm": 0.2639031410217285, "learning_rate": 4.6612893275290725e-05, "loss": 0.8746941089630127, "step": 3276 }, { "epoch": 4.020858895705522, "grad_norm": 0.21441636979579926, "learning_rate": 4.661037018097884e-05, "loss": 0.6409955024719238, "step": 3277 }, { "epoch": 4.022085889570552, "grad_norm": 0.2685709595680237, "learning_rate": 4.660784621561458e-05, "loss": 0.6379483938217163, "step": 3278 }, { "epoch": 4.023312883435583, "grad_norm": 0.25805169343948364, "learning_rate": 4.660532137929965e-05, "loss": 0.6064571738243103, "step": 3279 }, { "epoch": 4.024539877300613, "grad_norm": 0.23343788087368011, "learning_rate": 4.660279567213584e-05, "loss": 0.7707176804542542, "step": 3280 }, { "epoch": 4.0257668711656445, "grad_norm": 0.22291943430900574, "learning_rate": 4.660026909422495e-05, "loss": 0.7593660354614258, "step": 3281 }, { "epoch": 4.026993865030675, "grad_norm": 0.22215618193149567, "learning_rate": 4.659774164566882e-05, "loss": 0.8388714790344238, "step": 3282 }, { "epoch": 4.028220858895706, "grad_norm": 0.22910724580287933, "learning_rate": 4.659521332656932e-05, "loss": 0.7610337734222412, "step": 3283 }, { "epoch": 4.029447852760736, "grad_norm": 0.23858186602592468, "learning_rate": 4.6592684137028364e-05, "loss": 0.7218515872955322, "step": 3284 }, { "epoch": 4.030674846625767, "grad_norm": 0.25144508481025696, "learning_rate": 4.65901540771479e-05, "loss": 0.6816110610961914, "step": 3285 }, { "epoch": 4.031901840490797, "grad_norm": 0.228981614112854, "learning_rate": 4.6587623147029894e-05, "loss": 0.8488091230392456, "step": 3286 }, { "epoch": 4.0331288343558285, "grad_norm": 0.21880602836608887, "learning_rate": 4.658509134677638e-05, "loss": 0.9433708190917969, "step": 3287 }, { "epoch": 4.034355828220859, "grad_norm": 0.2286350429058075, "learning_rate": 4.6582558676489384e-05, "loss": 0.9427701234817505, "step": 3288 }, { "epoch": 4.03558282208589, "grad_norm": 0.23699882626533508, "learning_rate": 4.658002513627101e-05, "loss": 0.7454092502593994, "step": 3289 }, { "epoch": 4.03680981595092, "grad_norm": 0.17932040989398956, "learning_rate": 4.657749072622336e-05, "loss": 0.9033938646316528, "step": 3290 }, { "epoch": 4.038036809815951, "grad_norm": 0.22810876369476318, "learning_rate": 4.657495544644861e-05, "loss": 0.8086661100387573, "step": 3291 }, { "epoch": 4.039263803680981, "grad_norm": 0.24374137818813324, "learning_rate": 4.657241929704894e-05, "loss": 0.7217158079147339, "step": 3292 }, { "epoch": 4.0404907975460125, "grad_norm": 0.1978679746389389, "learning_rate": 4.6569882278126575e-05, "loss": 0.8962386846542358, "step": 3293 }, { "epoch": 4.041717791411043, "grad_norm": 0.22974887490272522, "learning_rate": 4.656734438978377e-05, "loss": 0.8466477394104004, "step": 3294 }, { "epoch": 4.042944785276074, "grad_norm": 0.2025294303894043, "learning_rate": 4.6564805632122825e-05, "loss": 0.7872094511985779, "step": 3295 }, { "epoch": 4.044171779141104, "grad_norm": 0.20510900020599365, "learning_rate": 4.6562266005246075e-05, "loss": 0.7994974851608276, "step": 3296 }, { "epoch": 4.045398773006135, "grad_norm": 0.21879743039608002, "learning_rate": 4.6559725509255875e-05, "loss": 0.9038136005401611, "step": 3297 }, { "epoch": 4.046625766871165, "grad_norm": 0.21768827736377716, "learning_rate": 4.655718414425463e-05, "loss": 0.7057574987411499, "step": 3298 }, { "epoch": 4.0478527607361965, "grad_norm": 0.23974645137786865, "learning_rate": 4.655464191034478e-05, "loss": 0.6501688957214355, "step": 3299 }, { "epoch": 4.049079754601227, "grad_norm": 0.2578112483024597, "learning_rate": 4.6552098807628784e-05, "loss": 0.8547738790512085, "step": 3300 }, { "epoch": 4.050306748466258, "grad_norm": 0.38237807154655457, "learning_rate": 4.6549554836209154e-05, "loss": 0.6734221577644348, "step": 3301 }, { "epoch": 4.051533742331288, "grad_norm": 0.19264063239097595, "learning_rate": 4.6547009996188434e-05, "loss": 0.7782533168792725, "step": 3302 }, { "epoch": 4.052760736196319, "grad_norm": 0.25201401114463806, "learning_rate": 4.65444642876692e-05, "loss": 0.7880195379257202, "step": 3303 }, { "epoch": 4.053987730061349, "grad_norm": 0.21294349431991577, "learning_rate": 4.654191771075405e-05, "loss": 0.7866199016571045, "step": 3304 }, { "epoch": 4.0552147239263805, "grad_norm": 0.19276967644691467, "learning_rate": 4.653937026554564e-05, "loss": 0.770285427570343, "step": 3305 }, { "epoch": 4.056441717791411, "grad_norm": 0.26941320300102234, "learning_rate": 4.6536821952146635e-05, "loss": 0.7169286608695984, "step": 3306 }, { "epoch": 4.057668711656442, "grad_norm": 0.25319162011146545, "learning_rate": 4.653427277065977e-05, "loss": 0.760176420211792, "step": 3307 }, { "epoch": 4.058895705521472, "grad_norm": 0.26301613450050354, "learning_rate": 4.6531722721187786e-05, "loss": 0.6666462421417236, "step": 3308 }, { "epoch": 4.060122699386503, "grad_norm": 0.2297217845916748, "learning_rate": 4.6529171803833476e-05, "loss": 0.6801573038101196, "step": 3309 }, { "epoch": 4.061349693251533, "grad_norm": 0.23779743909835815, "learning_rate": 4.652662001869965e-05, "loss": 0.6150611042976379, "step": 3310 }, { "epoch": 4.0625766871165645, "grad_norm": 0.256851464509964, "learning_rate": 4.652406736588917e-05, "loss": 0.7976522445678711, "step": 3311 }, { "epoch": 4.063803680981595, "grad_norm": 0.1971542239189148, "learning_rate": 4.6521513845504914e-05, "loss": 0.7574392557144165, "step": 3312 }, { "epoch": 4.065030674846626, "grad_norm": 0.2480389028787613, "learning_rate": 4.651895945764982e-05, "loss": 0.794293224811554, "step": 3313 }, { "epoch": 4.066257668711656, "grad_norm": 0.20832565426826477, "learning_rate": 4.651640420242685e-05, "loss": 0.966519296169281, "step": 3314 }, { "epoch": 4.067484662576687, "grad_norm": 0.2044353187084198, "learning_rate": 4.6513848079938985e-05, "loss": 0.8084956407546997, "step": 3315 }, { "epoch": 4.068711656441717, "grad_norm": 0.19765028357505798, "learning_rate": 4.651129109028927e-05, "loss": 0.8464943170547485, "step": 3316 }, { "epoch": 4.0699386503067485, "grad_norm": 0.26784101128578186, "learning_rate": 4.650873323358076e-05, "loss": 0.7040928602218628, "step": 3317 }, { "epoch": 4.071165644171779, "grad_norm": 0.2098543792963028, "learning_rate": 4.650617450991655e-05, "loss": 0.8618118762969971, "step": 3318 }, { "epoch": 4.07239263803681, "grad_norm": 0.2926958501338959, "learning_rate": 4.65036149193998e-05, "loss": 0.6436665058135986, "step": 3319 }, { "epoch": 4.07361963190184, "grad_norm": 0.21999523043632507, "learning_rate": 4.6501054462133656e-05, "loss": 0.8335769772529602, "step": 3320 }, { "epoch": 4.074846625766871, "grad_norm": 0.2125878632068634, "learning_rate": 4.649849313822133e-05, "loss": 0.6875518560409546, "step": 3321 }, { "epoch": 4.076073619631902, "grad_norm": 0.20847800374031067, "learning_rate": 4.649593094776606e-05, "loss": 0.7684239149093628, "step": 3322 }, { "epoch": 4.0773006134969325, "grad_norm": 0.24633172154426575, "learning_rate": 4.649336789087112e-05, "loss": 0.8312993049621582, "step": 3323 }, { "epoch": 4.078527607361964, "grad_norm": 0.2618439793586731, "learning_rate": 4.649080396763982e-05, "loss": 0.8069981336593628, "step": 3324 }, { "epoch": 4.079754601226994, "grad_norm": 0.22500352561473846, "learning_rate": 4.648823917817551e-05, "loss": 0.9249694347381592, "step": 3325 }, { "epoch": 4.080981595092025, "grad_norm": 0.22517214715480804, "learning_rate": 4.6485673522581565e-05, "loss": 0.8029170036315918, "step": 3326 }, { "epoch": 4.082208588957055, "grad_norm": 0.22058413922786713, "learning_rate": 4.6483107000961404e-05, "loss": 0.7591440677642822, "step": 3327 }, { "epoch": 4.083435582822086, "grad_norm": 0.19779135286808014, "learning_rate": 4.648053961341846e-05, "loss": 0.876034140586853, "step": 3328 }, { "epoch": 4.0846625766871165, "grad_norm": 0.21513915061950684, "learning_rate": 4.6477971360056235e-05, "loss": 0.9049266576766968, "step": 3329 }, { "epoch": 4.085889570552148, "grad_norm": 0.19384807348251343, "learning_rate": 4.6475402240978235e-05, "loss": 0.89473557472229, "step": 3330 }, { "epoch": 4.087116564417178, "grad_norm": 0.2118690460920334, "learning_rate": 4.647283225628803e-05, "loss": 0.9468141198158264, "step": 3331 }, { "epoch": 4.088343558282209, "grad_norm": 0.25417962670326233, "learning_rate": 4.64702614060892e-05, "loss": 0.8807984590530396, "step": 3332 }, { "epoch": 4.089570552147239, "grad_norm": 0.2622484564781189, "learning_rate": 4.646768969048536e-05, "loss": 0.7939594984054565, "step": 3333 }, { "epoch": 4.09079754601227, "grad_norm": 0.2559159994125366, "learning_rate": 4.646511710958018e-05, "loss": 0.571286141872406, "step": 3334 }, { "epoch": 4.0920245398773005, "grad_norm": 0.254003643989563, "learning_rate": 4.6462543663477345e-05, "loss": 0.770408570766449, "step": 3335 }, { "epoch": 4.093251533742332, "grad_norm": 0.1920326203107834, "learning_rate": 4.64599693522806e-05, "loss": 0.7589080333709717, "step": 3336 }, { "epoch": 4.094478527607362, "grad_norm": 0.21944813430309296, "learning_rate": 4.645739417609368e-05, "loss": 0.928877592086792, "step": 3337 }, { "epoch": 4.095705521472393, "grad_norm": 0.2560424208641052, "learning_rate": 4.645481813502041e-05, "loss": 0.7030628323554993, "step": 3338 }, { "epoch": 4.096932515337423, "grad_norm": 0.196628138422966, "learning_rate": 4.645224122916461e-05, "loss": 0.7708039283752441, "step": 3339 }, { "epoch": 4.098159509202454, "grad_norm": 0.20408272743225098, "learning_rate": 4.644966345863015e-05, "loss": 1.0096526145935059, "step": 3340 }, { "epoch": 4.0993865030674845, "grad_norm": 0.24839478731155396, "learning_rate": 4.6447084823520926e-05, "loss": 0.9124138951301575, "step": 3341 }, { "epoch": 4.100613496932516, "grad_norm": 0.20610707998275757, "learning_rate": 4.644450532394089e-05, "loss": 0.9203832149505615, "step": 3342 }, { "epoch": 4.101840490797546, "grad_norm": 0.20022143423557281, "learning_rate": 4.6441924959994005e-05, "loss": 0.8224292993545532, "step": 3343 }, { "epoch": 4.103067484662577, "grad_norm": 0.24586749076843262, "learning_rate": 4.643934373178428e-05, "loss": 0.7751604318618774, "step": 3344 }, { "epoch": 4.104294478527607, "grad_norm": 0.2258271425962448, "learning_rate": 4.643676163941575e-05, "loss": 0.7968194484710693, "step": 3345 }, { "epoch": 4.105521472392638, "grad_norm": 0.3404659032821655, "learning_rate": 4.6434178682992505e-05, "loss": 0.6126159429550171, "step": 3346 }, { "epoch": 4.1067484662576685, "grad_norm": 0.1944749653339386, "learning_rate": 4.6431594862618646e-05, "loss": 0.8939926624298096, "step": 3347 }, { "epoch": 4.1079754601227, "grad_norm": 0.2190735638141632, "learning_rate": 4.642901017839832e-05, "loss": 0.9786385297775269, "step": 3348 }, { "epoch": 4.10920245398773, "grad_norm": 0.23599204421043396, "learning_rate": 4.642642463043572e-05, "loss": 0.6401257514953613, "step": 3349 }, { "epoch": 4.110429447852761, "grad_norm": 0.21428322792053223, "learning_rate": 4.642383821883504e-05, "loss": 0.709513247013092, "step": 3350 }, { "epoch": 4.111656441717791, "grad_norm": 0.21926479041576385, "learning_rate": 4.6421250943700554e-05, "loss": 0.8221336603164673, "step": 3351 }, { "epoch": 4.112883435582822, "grad_norm": 0.23649172484874725, "learning_rate": 4.6418662805136535e-05, "loss": 0.8381974697113037, "step": 3352 }, { "epoch": 4.1141104294478525, "grad_norm": 0.24207161366939545, "learning_rate": 4.6416073803247304e-05, "loss": 0.6720119714736938, "step": 3353 }, { "epoch": 4.115337423312884, "grad_norm": 0.225946843624115, "learning_rate": 4.641348393813723e-05, "loss": 0.6757930517196655, "step": 3354 }, { "epoch": 4.116564417177914, "grad_norm": 0.21370890736579895, "learning_rate": 4.6410893209910676e-05, "loss": 0.7553167939186096, "step": 3355 }, { "epoch": 4.117791411042945, "grad_norm": 0.22223016619682312, "learning_rate": 4.640830161867209e-05, "loss": 0.8604041934013367, "step": 3356 }, { "epoch": 4.119018404907975, "grad_norm": 0.1928766816854477, "learning_rate": 4.6405709164525924e-05, "loss": 0.7857091426849365, "step": 3357 }, { "epoch": 4.120245398773006, "grad_norm": 0.21667225658893585, "learning_rate": 4.6403115847576665e-05, "loss": 0.7776851654052734, "step": 3358 }, { "epoch": 4.1214723926380366, "grad_norm": 0.24161165952682495, "learning_rate": 4.640052166792886e-05, "loss": 0.7944781184196472, "step": 3359 }, { "epoch": 4.122699386503068, "grad_norm": 0.2880147397518158, "learning_rate": 4.639792662568706e-05, "loss": 0.6466293334960938, "step": 3360 }, { "epoch": 4.123926380368098, "grad_norm": 0.25497663021087646, "learning_rate": 4.639533072095586e-05, "loss": 0.6794538497924805, "step": 3361 }, { "epoch": 4.125153374233129, "grad_norm": 0.24764849245548248, "learning_rate": 4.639273395383991e-05, "loss": 0.7150615453720093, "step": 3362 }, { "epoch": 4.126380368098159, "grad_norm": 0.2057272046804428, "learning_rate": 4.639013632444387e-05, "loss": 0.8580621480941772, "step": 3363 }, { "epoch": 4.12760736196319, "grad_norm": 0.2170163094997406, "learning_rate": 4.638753783287243e-05, "loss": 0.8226796388626099, "step": 3364 }, { "epoch": 4.128834355828221, "grad_norm": 0.20248791575431824, "learning_rate": 4.6384938479230345e-05, "loss": 0.890621542930603, "step": 3365 }, { "epoch": 4.130061349693252, "grad_norm": 0.21793560683727264, "learning_rate": 4.6382338263622385e-05, "loss": 0.7963681221008301, "step": 3366 }, { "epoch": 4.131288343558282, "grad_norm": 0.19097000360488892, "learning_rate": 4.6379737186153346e-05, "loss": 0.776218831539154, "step": 3367 }, { "epoch": 4.132515337423313, "grad_norm": 0.23971591889858246, "learning_rate": 4.6377135246928084e-05, "loss": 0.733920693397522, "step": 3368 }, { "epoch": 4.133742331288343, "grad_norm": 0.20547357201576233, "learning_rate": 4.637453244605147e-05, "loss": 0.7374631762504578, "step": 3369 }, { "epoch": 4.134969325153374, "grad_norm": 0.25135767459869385, "learning_rate": 4.637192878362841e-05, "loss": 0.7776519060134888, "step": 3370 }, { "epoch": 4.136196319018405, "grad_norm": 0.21455331146717072, "learning_rate": 4.636932425976386e-05, "loss": 0.9772138595581055, "step": 3371 }, { "epoch": 4.137423312883436, "grad_norm": 0.24747367203235626, "learning_rate": 4.63667188745628e-05, "loss": 0.8290339112281799, "step": 3372 }, { "epoch": 4.138650306748466, "grad_norm": 0.2828349471092224, "learning_rate": 4.636411262813023e-05, "loss": 0.7511727809906006, "step": 3373 }, { "epoch": 4.139877300613497, "grad_norm": 0.2305436134338379, "learning_rate": 4.6361505520571217e-05, "loss": 0.872114360332489, "step": 3374 }, { "epoch": 4.141104294478527, "grad_norm": 0.1940653920173645, "learning_rate": 4.6358897551990846e-05, "loss": 0.866155743598938, "step": 3375 }, { "epoch": 4.142331288343558, "grad_norm": 0.25620052218437195, "learning_rate": 4.635628872249423e-05, "loss": 0.7513256072998047, "step": 3376 }, { "epoch": 4.143558282208589, "grad_norm": 0.25066229701042175, "learning_rate": 4.6353679032186526e-05, "loss": 0.6550865173339844, "step": 3377 }, { "epoch": 4.14478527607362, "grad_norm": 0.25779175758361816, "learning_rate": 4.635106848117292e-05, "loss": 0.7412557601928711, "step": 3378 }, { "epoch": 4.14601226993865, "grad_norm": 0.1863250732421875, "learning_rate": 4.634845706955864e-05, "loss": 0.8664027452468872, "step": 3379 }, { "epoch": 4.147239263803681, "grad_norm": 0.23650740087032318, "learning_rate": 4.6345844797448946e-05, "loss": 0.7353291511535645, "step": 3380 }, { "epoch": 4.148466257668711, "grad_norm": 0.23477010428905487, "learning_rate": 4.6343231664949126e-05, "loss": 0.811098575592041, "step": 3381 }, { "epoch": 4.149693251533742, "grad_norm": 0.2025481015443802, "learning_rate": 4.634061767216451e-05, "loss": 0.7158323526382446, "step": 3382 }, { "epoch": 4.150920245398773, "grad_norm": 0.26021808385849, "learning_rate": 4.633800281920046e-05, "loss": 0.7277798056602478, "step": 3383 }, { "epoch": 4.152147239263804, "grad_norm": 0.2725154459476471, "learning_rate": 4.6335387106162374e-05, "loss": 0.7558757066726685, "step": 3384 }, { "epoch": 4.153374233128835, "grad_norm": 0.2933405935764313, "learning_rate": 4.633277053315569e-05, "loss": 0.587714433670044, "step": 3385 }, { "epoch": 4.154601226993865, "grad_norm": 0.22193461656570435, "learning_rate": 4.6330153100285866e-05, "loss": 0.8627781867980957, "step": 3386 }, { "epoch": 4.155828220858895, "grad_norm": 0.24636738002300262, "learning_rate": 4.6327534807658403e-05, "loss": 0.8199740648269653, "step": 3387 }, { "epoch": 4.157055214723926, "grad_norm": 0.228979229927063, "learning_rate": 4.632491565537884e-05, "loss": 0.7882550954818726, "step": 3388 }, { "epoch": 4.1582822085889575, "grad_norm": 0.21277929842472076, "learning_rate": 4.632229564355275e-05, "loss": 0.8538118600845337, "step": 3389 }, { "epoch": 4.159509202453988, "grad_norm": 0.22793729603290558, "learning_rate": 4.631967477228574e-05, "loss": 0.8142079710960388, "step": 3390 }, { "epoch": 4.160736196319019, "grad_norm": 0.22055907547473907, "learning_rate": 4.6317053041683435e-05, "loss": 0.8149504661560059, "step": 3391 }, { "epoch": 4.161963190184049, "grad_norm": 0.20923450589179993, "learning_rate": 4.631443045185153e-05, "loss": 0.8021618127822876, "step": 3392 }, { "epoch": 4.16319018404908, "grad_norm": 0.19840246438980103, "learning_rate": 4.631180700289572e-05, "loss": 0.7848821878433228, "step": 3393 }, { "epoch": 4.16441717791411, "grad_norm": 0.30346405506134033, "learning_rate": 4.630918269492175e-05, "loss": 0.7624876499176025, "step": 3394 }, { "epoch": 4.1656441717791415, "grad_norm": 0.18636155128479004, "learning_rate": 4.63065575280354e-05, "loss": 0.8041031360626221, "step": 3395 }, { "epoch": 4.166871165644172, "grad_norm": 0.26466333866119385, "learning_rate": 4.630393150234249e-05, "loss": 0.6574395895004272, "step": 3396 }, { "epoch": 4.168098159509203, "grad_norm": 0.2151612490415573, "learning_rate": 4.630130461794886e-05, "loss": 0.8898758292198181, "step": 3397 }, { "epoch": 4.169325153374233, "grad_norm": 0.20211422443389893, "learning_rate": 4.6298676874960395e-05, "loss": 0.8773707747459412, "step": 3398 }, { "epoch": 4.170552147239264, "grad_norm": 0.25087419152259827, "learning_rate": 4.629604827348301e-05, "loss": 0.8180067539215088, "step": 3399 }, { "epoch": 4.171779141104294, "grad_norm": 0.22456562519073486, "learning_rate": 4.6293418813622656e-05, "loss": 0.8728682994842529, "step": 3400 }, { "epoch": 4.1730061349693255, "grad_norm": 0.22729788720607758, "learning_rate": 4.629078849548533e-05, "loss": 0.7430281043052673, "step": 3401 }, { "epoch": 4.174233128834356, "grad_norm": 0.22945523262023926, "learning_rate": 4.628815731917703e-05, "loss": 0.7524617910385132, "step": 3402 }, { "epoch": 4.175460122699387, "grad_norm": 0.20275726914405823, "learning_rate": 4.6285525284803835e-05, "loss": 0.8836286067962646, "step": 3403 }, { "epoch": 4.176687116564417, "grad_norm": 0.32598555088043213, "learning_rate": 4.628289239247181e-05, "loss": 0.6390552520751953, "step": 3404 }, { "epoch": 4.177914110429448, "grad_norm": 0.19471901655197144, "learning_rate": 4.62802586422871e-05, "loss": 0.8375537395477295, "step": 3405 }, { "epoch": 4.179141104294478, "grad_norm": 0.2504122257232666, "learning_rate": 4.627762403435586e-05, "loss": 0.7611386775970459, "step": 3406 }, { "epoch": 4.1803680981595095, "grad_norm": 0.2700304090976715, "learning_rate": 4.6274988568784285e-05, "loss": 0.7668710947036743, "step": 3407 }, { "epoch": 4.18159509202454, "grad_norm": 0.24004733562469482, "learning_rate": 4.627235224567858e-05, "loss": 0.7899552583694458, "step": 3408 }, { "epoch": 4.182822085889571, "grad_norm": 0.2294309139251709, "learning_rate": 4.626971506514505e-05, "loss": 0.771630585193634, "step": 3409 }, { "epoch": 4.184049079754601, "grad_norm": 0.22072556614875793, "learning_rate": 4.626707702728996e-05, "loss": 0.7444704174995422, "step": 3410 }, { "epoch": 4.185276073619632, "grad_norm": 0.2347823828458786, "learning_rate": 4.626443813221964e-05, "loss": 0.80430006980896, "step": 3411 }, { "epoch": 4.186503067484662, "grad_norm": 0.20850086212158203, "learning_rate": 4.626179838004048e-05, "loss": 0.9257175922393799, "step": 3412 }, { "epoch": 4.1877300613496935, "grad_norm": 0.26998502016067505, "learning_rate": 4.625915777085887e-05, "loss": 0.7144569158554077, "step": 3413 }, { "epoch": 4.188957055214724, "grad_norm": 0.22110767662525177, "learning_rate": 4.625651630478124e-05, "loss": 0.8179722428321838, "step": 3414 }, { "epoch": 4.190184049079755, "grad_norm": 0.22609201073646545, "learning_rate": 4.6253873981914054e-05, "loss": 0.7959173321723938, "step": 3415 }, { "epoch": 4.191411042944785, "grad_norm": 0.26535919308662415, "learning_rate": 4.625123080236383e-05, "loss": 0.7858412265777588, "step": 3416 }, { "epoch": 4.192638036809816, "grad_norm": 0.1883682906627655, "learning_rate": 4.6248586766237115e-05, "loss": 0.9134241342544556, "step": 3417 }, { "epoch": 4.193865030674846, "grad_norm": 0.22819115221500397, "learning_rate": 4.624594187364045e-05, "loss": 0.7911913990974426, "step": 3418 }, { "epoch": 4.1950920245398775, "grad_norm": 0.24664470553398132, "learning_rate": 4.624329612468048e-05, "loss": 0.6888760328292847, "step": 3419 }, { "epoch": 4.196319018404908, "grad_norm": 0.197964146733284, "learning_rate": 4.6240649519463826e-05, "loss": 0.7473239898681641, "step": 3420 }, { "epoch": 4.197546012269939, "grad_norm": 0.24518238008022308, "learning_rate": 4.623800205809717e-05, "loss": 0.9570253491401672, "step": 3421 }, { "epoch": 4.198773006134969, "grad_norm": 0.23463967442512512, "learning_rate": 4.623535374068722e-05, "loss": 0.8411023020744324, "step": 3422 }, { "epoch": 4.2, "grad_norm": 0.22347129881381989, "learning_rate": 4.623270456734073e-05, "loss": 0.7775678634643555, "step": 3423 }, { "epoch": 4.20122699386503, "grad_norm": 0.21297913789749146, "learning_rate": 4.6230054538164474e-05, "loss": 0.7091910243034363, "step": 3424 }, { "epoch": 4.2024539877300615, "grad_norm": 0.19610898196697235, "learning_rate": 4.622740365326527e-05, "loss": 0.8042477369308472, "step": 3425 }, { "epoch": 4.203680981595092, "grad_norm": 0.20137667655944824, "learning_rate": 4.622475191274997e-05, "loss": 0.8347076177597046, "step": 3426 }, { "epoch": 4.204907975460123, "grad_norm": 0.23948007822036743, "learning_rate": 4.6222099316725456e-05, "loss": 0.7135329246520996, "step": 3427 }, { "epoch": 4.206134969325153, "grad_norm": 0.23171953856945038, "learning_rate": 4.621944586529864e-05, "loss": 0.8609417676925659, "step": 3428 }, { "epoch": 4.207361963190184, "grad_norm": 0.21896931529045105, "learning_rate": 4.621679155857648e-05, "loss": 0.6821650266647339, "step": 3429 }, { "epoch": 4.208588957055214, "grad_norm": 0.21129214763641357, "learning_rate": 4.621413639666598e-05, "loss": 0.8233480453491211, "step": 3430 }, { "epoch": 4.2098159509202455, "grad_norm": 0.2098982334136963, "learning_rate": 4.6211480379674124e-05, "loss": 0.8749401569366455, "step": 3431 }, { "epoch": 4.211042944785276, "grad_norm": 0.26778239011764526, "learning_rate": 4.6208823507708005e-05, "loss": 0.7511553168296814, "step": 3432 }, { "epoch": 4.212269938650307, "grad_norm": 0.21670463681221008, "learning_rate": 4.62061657808747e-05, "loss": 0.8220174312591553, "step": 3433 }, { "epoch": 4.213496932515337, "grad_norm": 0.24230244755744934, "learning_rate": 4.6203507199281324e-05, "loss": 0.7695741653442383, "step": 3434 }, { "epoch": 4.214723926380368, "grad_norm": 0.22558240592479706, "learning_rate": 4.620084776303506e-05, "loss": 0.7095448970794678, "step": 3435 }, { "epoch": 4.215950920245398, "grad_norm": 0.2023901790380478, "learning_rate": 4.619818747224308e-05, "loss": 0.8207946419715881, "step": 3436 }, { "epoch": 4.2171779141104295, "grad_norm": 0.22174666821956635, "learning_rate": 4.619552632701263e-05, "loss": 0.8906105756759644, "step": 3437 }, { "epoch": 4.21840490797546, "grad_norm": 0.24096067249774933, "learning_rate": 4.6192864327450955e-05, "loss": 0.7707874178886414, "step": 3438 }, { "epoch": 4.219631901840491, "grad_norm": 0.23862707614898682, "learning_rate": 4.619020147366537e-05, "loss": 0.8358721733093262, "step": 3439 }, { "epoch": 4.220858895705521, "grad_norm": 0.27549731731414795, "learning_rate": 4.61875377657632e-05, "loss": 0.782272458076477, "step": 3440 }, { "epoch": 4.222085889570552, "grad_norm": 0.23786798119544983, "learning_rate": 4.6184873203851806e-05, "loss": 0.7323502898216248, "step": 3441 }, { "epoch": 4.223312883435582, "grad_norm": 0.2474987804889679, "learning_rate": 4.61822077880386e-05, "loss": 0.7315883636474609, "step": 3442 }, { "epoch": 4.2245398773006135, "grad_norm": 0.27754709124565125, "learning_rate": 4.6179541518431014e-05, "loss": 0.7797079086303711, "step": 3443 }, { "epoch": 4.225766871165644, "grad_norm": 0.2721702754497528, "learning_rate": 4.617687439513651e-05, "loss": 0.7353920340538025, "step": 3444 }, { "epoch": 4.226993865030675, "grad_norm": 0.20212097465991974, "learning_rate": 4.61742064182626e-05, "loss": 0.9899324178695679, "step": 3445 }, { "epoch": 4.228220858895705, "grad_norm": 0.21328392624855042, "learning_rate": 4.617153758791681e-05, "loss": 0.7491623163223267, "step": 3446 }, { "epoch": 4.229447852760736, "grad_norm": 0.2516477704048157, "learning_rate": 4.616886790420674e-05, "loss": 0.7091597318649292, "step": 3447 }, { "epoch": 4.230674846625767, "grad_norm": 0.24328291416168213, "learning_rate": 4.6166197367239974e-05, "loss": 0.7562169432640076, "step": 3448 }, { "epoch": 4.2319018404907975, "grad_norm": 0.25265949964523315, "learning_rate": 4.616352597712416e-05, "loss": 0.7916821241378784, "step": 3449 }, { "epoch": 4.233128834355828, "grad_norm": 0.23367539048194885, "learning_rate": 4.616085373396697e-05, "loss": 0.8356384038925171, "step": 3450 }, { "epoch": 4.234355828220859, "grad_norm": 0.19523388147354126, "learning_rate": 4.615818063787611e-05, "loss": 1.002617597579956, "step": 3451 }, { "epoch": 4.23558282208589, "grad_norm": 0.23518207669258118, "learning_rate": 4.615550668895935e-05, "loss": 0.6971588730812073, "step": 3452 }, { "epoch": 4.23680981595092, "grad_norm": 0.2423889935016632, "learning_rate": 4.615283188732445e-05, "loss": 0.8617277145385742, "step": 3453 }, { "epoch": 4.238036809815951, "grad_norm": 0.23847417533397675, "learning_rate": 4.6150156233079225e-05, "loss": 0.8206027150154114, "step": 3454 }, { "epoch": 4.2392638036809815, "grad_norm": 0.21428366005420685, "learning_rate": 4.614747972633152e-05, "loss": 0.7814315557479858, "step": 3455 }, { "epoch": 4.240490797546013, "grad_norm": 0.26165685057640076, "learning_rate": 4.614480236718922e-05, "loss": 0.756369948387146, "step": 3456 }, { "epoch": 4.241717791411043, "grad_norm": 0.24135041236877441, "learning_rate": 4.6142124155760244e-05, "loss": 0.9175868034362793, "step": 3457 }, { "epoch": 4.242944785276074, "grad_norm": 0.21999219059944153, "learning_rate": 4.6139445092152554e-05, "loss": 0.9298325777053833, "step": 3458 }, { "epoch": 4.244171779141104, "grad_norm": 0.26383763551712036, "learning_rate": 4.6136765176474117e-05, "loss": 0.7697503566741943, "step": 3459 }, { "epoch": 4.245398773006135, "grad_norm": 0.22078707814216614, "learning_rate": 4.613408440883295e-05, "loss": 0.9313132166862488, "step": 3460 }, { "epoch": 4.2466257668711656, "grad_norm": 0.2795831561088562, "learning_rate": 4.613140278933714e-05, "loss": 0.6976619958877563, "step": 3461 }, { "epoch": 4.247852760736197, "grad_norm": 0.20707666873931885, "learning_rate": 4.612872031809473e-05, "loss": 0.8040269017219543, "step": 3462 }, { "epoch": 4.249079754601227, "grad_norm": 0.241266131401062, "learning_rate": 4.612603699521387e-05, "loss": 0.851594090461731, "step": 3463 }, { "epoch": 4.250306748466258, "grad_norm": 0.27361804246902466, "learning_rate": 4.6123352820802724e-05, "loss": 0.6070040464401245, "step": 3464 }, { "epoch": 4.251533742331288, "grad_norm": 0.1903499811887741, "learning_rate": 4.612066779496946e-05, "loss": 0.819622278213501, "step": 3465 }, { "epoch": 4.252760736196319, "grad_norm": 0.2394132763147354, "learning_rate": 4.611798191782232e-05, "loss": 0.7220662832260132, "step": 3466 }, { "epoch": 4.25398773006135, "grad_norm": 0.26811057329177856, "learning_rate": 4.6115295189469556e-05, "loss": 0.7541800737380981, "step": 3467 }, { "epoch": 4.255214723926381, "grad_norm": 0.2057024985551834, "learning_rate": 4.611260761001946e-05, "loss": 0.8449641466140747, "step": 3468 }, { "epoch": 4.256441717791411, "grad_norm": 0.26699763536453247, "learning_rate": 4.610991917958037e-05, "loss": 0.6331000328063965, "step": 3469 }, { "epoch": 4.257668711656442, "grad_norm": 0.20228375494480133, "learning_rate": 4.610722989826066e-05, "loss": 0.7861304879188538, "step": 3470 }, { "epoch": 4.258895705521472, "grad_norm": 0.2731303870677948, "learning_rate": 4.6104539766168696e-05, "loss": 0.7516647577285767, "step": 3471 }, { "epoch": 4.260122699386503, "grad_norm": 0.2599030137062073, "learning_rate": 4.610184878341293e-05, "loss": 0.7017146348953247, "step": 3472 }, { "epoch": 4.261349693251534, "grad_norm": 0.2311665415763855, "learning_rate": 4.609915695010183e-05, "loss": 0.7953031063079834, "step": 3473 }, { "epoch": 4.262576687116565, "grad_norm": 0.2191152572631836, "learning_rate": 4.609646426634388e-05, "loss": 0.804379940032959, "step": 3474 }, { "epoch": 4.263803680981595, "grad_norm": 0.24761594831943512, "learning_rate": 4.609377073224763e-05, "loss": 0.7962141036987305, "step": 3475 }, { "epoch": 4.265030674846626, "grad_norm": 0.19299714267253876, "learning_rate": 4.609107634792164e-05, "loss": 0.9229481220245361, "step": 3476 }, { "epoch": 4.266257668711656, "grad_norm": 0.21336433291435242, "learning_rate": 4.608838111347452e-05, "loss": 0.8569420576095581, "step": 3477 }, { "epoch": 4.267484662576687, "grad_norm": 0.2217307984828949, "learning_rate": 4.60856850290149e-05, "loss": 0.7246884107589722, "step": 3478 }, { "epoch": 4.268711656441718, "grad_norm": 0.20737938582897186, "learning_rate": 4.608298809465146e-05, "loss": 0.8971391320228577, "step": 3479 }, { "epoch": 4.269938650306749, "grad_norm": 0.30771222710609436, "learning_rate": 4.608029031049289e-05, "loss": 0.7239311933517456, "step": 3480 }, { "epoch": 4.271165644171779, "grad_norm": 0.2463160753250122, "learning_rate": 4.607759167664795e-05, "loss": 0.7998148798942566, "step": 3481 }, { "epoch": 4.27239263803681, "grad_norm": 0.2928082346916199, "learning_rate": 4.6074892193225406e-05, "loss": 0.7247143387794495, "step": 3482 }, { "epoch": 4.27361963190184, "grad_norm": 0.17442181706428528, "learning_rate": 4.607219186033406e-05, "loss": 0.8692120313644409, "step": 3483 }, { "epoch": 4.274846625766871, "grad_norm": 0.19932422041893005, "learning_rate": 4.606949067808276e-05, "loss": 0.8523236513137817, "step": 3484 }, { "epoch": 4.276073619631902, "grad_norm": 0.2099640965461731, "learning_rate": 4.606678864658038e-05, "loss": 0.7903527617454529, "step": 3485 }, { "epoch": 4.277300613496933, "grad_norm": 0.2405581772327423, "learning_rate": 4.6064085765935835e-05, "loss": 0.8307009339332581, "step": 3486 }, { "epoch": 4.278527607361963, "grad_norm": 0.26838597655296326, "learning_rate": 4.6061382036258075e-05, "loss": 0.7456153035163879, "step": 3487 }, { "epoch": 4.279754601226994, "grad_norm": 0.20539790391921997, "learning_rate": 4.605867745765607e-05, "loss": 0.7411175966262817, "step": 3488 }, { "epoch": 4.280981595092024, "grad_norm": 0.1788395643234253, "learning_rate": 4.605597203023885e-05, "loss": 1.0278964042663574, "step": 3489 }, { "epoch": 4.282208588957055, "grad_norm": 0.26232606172561646, "learning_rate": 4.605326575411544e-05, "loss": 0.6591633558273315, "step": 3490 }, { "epoch": 4.283435582822086, "grad_norm": 0.2536403238773346, "learning_rate": 4.605055862939493e-05, "loss": 0.7984484434127808, "step": 3491 }, { "epoch": 4.284662576687117, "grad_norm": 0.22330085933208466, "learning_rate": 4.604785065618644e-05, "loss": 0.871733546257019, "step": 3492 }, { "epoch": 4.285889570552147, "grad_norm": 0.1982170194387436, "learning_rate": 4.604514183459912e-05, "loss": 0.7143889665603638, "step": 3493 }, { "epoch": 4.287116564417178, "grad_norm": 0.2482047826051712, "learning_rate": 4.604243216474217e-05, "loss": 0.8402642011642456, "step": 3494 }, { "epoch": 4.288343558282208, "grad_norm": 0.1940820813179016, "learning_rate": 4.6039721646724776e-05, "loss": 0.8077747821807861, "step": 3495 }, { "epoch": 4.289570552147239, "grad_norm": 0.24012306332588196, "learning_rate": 4.6037010280656216e-05, "loss": 0.8554771542549133, "step": 3496 }, { "epoch": 4.29079754601227, "grad_norm": 0.23802566528320312, "learning_rate": 4.6034298066645774e-05, "loss": 0.6402826905250549, "step": 3497 }, { "epoch": 4.292024539877301, "grad_norm": 0.2350417971611023, "learning_rate": 4.6031585004802765e-05, "loss": 0.7932632565498352, "step": 3498 }, { "epoch": 4.293251533742331, "grad_norm": 0.24314570426940918, "learning_rate": 4.6028871095236546e-05, "loss": 0.700215220451355, "step": 3499 }, { "epoch": 4.294478527607362, "grad_norm": 0.2378229945898056, "learning_rate": 4.602615633805652e-05, "loss": 0.7408008575439453, "step": 3500 }, { "epoch": 4.295705521472392, "grad_norm": 0.23270782828330994, "learning_rate": 4.602344073337209e-05, "loss": 0.8706061244010925, "step": 3501 }, { "epoch": 4.296932515337423, "grad_norm": 0.2215019166469574, "learning_rate": 4.602072428129273e-05, "loss": 0.9453461170196533, "step": 3502 }, { "epoch": 4.298159509202454, "grad_norm": 0.26687467098236084, "learning_rate": 4.601800698192792e-05, "loss": 0.6331028938293457, "step": 3503 }, { "epoch": 4.299386503067485, "grad_norm": 0.21715505421161652, "learning_rate": 4.601528883538721e-05, "loss": 0.7682393789291382, "step": 3504 }, { "epoch": 4.300613496932515, "grad_norm": 0.2818583846092224, "learning_rate": 4.6012569841780126e-05, "loss": 0.7385059595108032, "step": 3505 }, { "epoch": 4.301840490797546, "grad_norm": 0.24951985478401184, "learning_rate": 4.600985000121629e-05, "loss": 0.7358300685882568, "step": 3506 }, { "epoch": 4.303067484662577, "grad_norm": 0.198475643992424, "learning_rate": 4.600712931380532e-05, "loss": 0.9403992891311646, "step": 3507 }, { "epoch": 4.304294478527607, "grad_norm": 0.23263852298259735, "learning_rate": 4.6004407779656886e-05, "loss": 0.6444194316864014, "step": 3508 }, { "epoch": 4.305521472392638, "grad_norm": 0.23291358351707458, "learning_rate": 4.600168539888068e-05, "loss": 0.7412539720535278, "step": 3509 }, { "epoch": 4.306748466257669, "grad_norm": 0.2256702035665512, "learning_rate": 4.599896217158643e-05, "loss": 0.9933730363845825, "step": 3510 }, { "epoch": 4.3079754601227, "grad_norm": 0.28697669506073, "learning_rate": 4.599623809788391e-05, "loss": 0.7450973987579346, "step": 3511 }, { "epoch": 4.30920245398773, "grad_norm": 0.20721442997455597, "learning_rate": 4.599351317788291e-05, "loss": 0.746092677116394, "step": 3512 }, { "epoch": 4.31042944785276, "grad_norm": 0.2475602924823761, "learning_rate": 4.5990787411693274e-05, "loss": 0.7786272764205933, "step": 3513 }, { "epoch": 4.311656441717791, "grad_norm": 0.2342311292886734, "learning_rate": 4.598806079942486e-05, "loss": 0.7801030278205872, "step": 3514 }, { "epoch": 4.3128834355828225, "grad_norm": 0.19459474086761475, "learning_rate": 4.598533334118759e-05, "loss": 0.8514308929443359, "step": 3515 }, { "epoch": 4.314110429447853, "grad_norm": 0.18825384974479675, "learning_rate": 4.5982605037091366e-05, "loss": 0.9383343458175659, "step": 3516 }, { "epoch": 4.315337423312884, "grad_norm": 0.2420346587896347, "learning_rate": 4.597987588724619e-05, "loss": 0.8146602511405945, "step": 3517 }, { "epoch": 4.316564417177914, "grad_norm": 0.2124072164297104, "learning_rate": 4.597714589176204e-05, "loss": 0.7682129144668579, "step": 3518 }, { "epoch": 4.317791411042945, "grad_norm": 0.23175489902496338, "learning_rate": 4.597441505074898e-05, "loss": 0.8627054691314697, "step": 3519 }, { "epoch": 4.319018404907975, "grad_norm": 0.291715532541275, "learning_rate": 4.597168336431706e-05, "loss": 0.698890745639801, "step": 3520 }, { "epoch": 4.3202453987730065, "grad_norm": 0.21605464816093445, "learning_rate": 4.596895083257641e-05, "loss": 0.8374708890914917, "step": 3521 }, { "epoch": 4.321472392638037, "grad_norm": 0.2100566178560257, "learning_rate": 4.596621745563715e-05, "loss": 0.7661963701248169, "step": 3522 }, { "epoch": 4.322699386503068, "grad_norm": 0.2595961093902588, "learning_rate": 4.596348323360946e-05, "loss": 0.721593976020813, "step": 3523 }, { "epoch": 4.323926380368098, "grad_norm": 0.24663716554641724, "learning_rate": 4.5960748166603554e-05, "loss": 0.8705881834030151, "step": 3524 }, { "epoch": 4.325153374233129, "grad_norm": 0.23256021738052368, "learning_rate": 4.595801225472968e-05, "loss": 0.730202317237854, "step": 3525 }, { "epoch": 4.326380368098159, "grad_norm": 0.19052590429782867, "learning_rate": 4.5955275498098086e-05, "loss": 0.9069713354110718, "step": 3526 }, { "epoch": 4.3276073619631905, "grad_norm": 0.2150927037000656, "learning_rate": 4.595253789681912e-05, "loss": 0.9829047918319702, "step": 3527 }, { "epoch": 4.328834355828221, "grad_norm": 0.18714770674705505, "learning_rate": 4.59497994510031e-05, "loss": 0.845508337020874, "step": 3528 }, { "epoch": 4.330061349693252, "grad_norm": 0.44555214047431946, "learning_rate": 4.594706016076041e-05, "loss": 0.8816633224487305, "step": 3529 }, { "epoch": 4.331288343558282, "grad_norm": 0.2321224808692932, "learning_rate": 4.594432002620148e-05, "loss": 0.7926952838897705, "step": 3530 }, { "epoch": 4.332515337423313, "grad_norm": 0.2692148983478546, "learning_rate": 4.594157904743674e-05, "loss": 0.7946418523788452, "step": 3531 }, { "epoch": 4.333742331288343, "grad_norm": 0.23082469403743744, "learning_rate": 4.593883722457668e-05, "loss": 0.8287723660469055, "step": 3532 }, { "epoch": 4.3349693251533745, "grad_norm": 0.24717815220355988, "learning_rate": 4.593609455773181e-05, "loss": 0.7643803358078003, "step": 3533 }, { "epoch": 4.336196319018405, "grad_norm": 0.234775111079216, "learning_rate": 4.5933351047012676e-05, "loss": 0.7497406005859375, "step": 3534 }, { "epoch": 4.337423312883436, "grad_norm": 0.22239874303340912, "learning_rate": 4.593060669252986e-05, "loss": 0.6935547590255737, "step": 3535 }, { "epoch": 4.338650306748466, "grad_norm": 0.21718576550483704, "learning_rate": 4.5927861494394e-05, "loss": 0.8677587509155273, "step": 3536 }, { "epoch": 4.339877300613497, "grad_norm": 0.19595326483249664, "learning_rate": 4.592511545271572e-05, "loss": 0.8672071695327759, "step": 3537 }, { "epoch": 4.341104294478527, "grad_norm": 0.2927446961402893, "learning_rate": 4.5922368567605714e-05, "loss": 0.7767988443374634, "step": 3538 }, { "epoch": 4.3423312883435585, "grad_norm": 0.23346517980098724, "learning_rate": 4.591962083917471e-05, "loss": 0.721664547920227, "step": 3539 }, { "epoch": 4.343558282208589, "grad_norm": 0.17632770538330078, "learning_rate": 4.591687226753345e-05, "loss": 0.8592240810394287, "step": 3540 }, { "epoch": 4.34478527607362, "grad_norm": 0.2333017736673355, "learning_rate": 4.591412285279273e-05, "loss": 0.8718945980072021, "step": 3541 }, { "epoch": 4.34601226993865, "grad_norm": 0.2381768673658371, "learning_rate": 4.5911372595063365e-05, "loss": 0.9497817754745483, "step": 3542 }, { "epoch": 4.347239263803681, "grad_norm": 0.2110477089881897, "learning_rate": 4.5908621494456215e-05, "loss": 0.67487633228302, "step": 3543 }, { "epoch": 4.348466257668711, "grad_norm": 0.25329723954200745, "learning_rate": 4.590586955108216e-05, "loss": 0.7585359215736389, "step": 3544 }, { "epoch": 4.3496932515337425, "grad_norm": 0.20941175520420074, "learning_rate": 4.590311676505213e-05, "loss": 0.888245701789856, "step": 3545 }, { "epoch": 4.350920245398773, "grad_norm": 0.25901928544044495, "learning_rate": 4.590036313647709e-05, "loss": 0.7193784713745117, "step": 3546 }, { "epoch": 4.352147239263804, "grad_norm": 0.20585817098617554, "learning_rate": 4.589760866546801e-05, "loss": 0.903052568435669, "step": 3547 }, { "epoch": 4.353374233128834, "grad_norm": 0.23663640022277832, "learning_rate": 4.589485335213593e-05, "loss": 0.7582786083221436, "step": 3548 }, { "epoch": 4.354601226993865, "grad_norm": 0.2296985685825348, "learning_rate": 4.589209719659191e-05, "loss": 0.8358009457588196, "step": 3549 }, { "epoch": 4.355828220858895, "grad_norm": 0.23065818846225739, "learning_rate": 4.588934019894703e-05, "loss": 0.6936711072921753, "step": 3550 }, { "epoch": 4.3570552147239265, "grad_norm": 0.2231329381465912, "learning_rate": 4.5886582359312426e-05, "loss": 0.7561604976654053, "step": 3551 }, { "epoch": 4.358282208588957, "grad_norm": 0.24836450815200806, "learning_rate": 4.588382367779927e-05, "loss": 0.8822187185287476, "step": 3552 }, { "epoch": 4.359509202453988, "grad_norm": 0.24438951909542084, "learning_rate": 4.588106415451873e-05, "loss": 0.7979847192764282, "step": 3553 }, { "epoch": 4.360736196319018, "grad_norm": 0.22296391427516937, "learning_rate": 4.587830378958205e-05, "loss": 0.8139451742172241, "step": 3554 }, { "epoch": 4.361963190184049, "grad_norm": 0.21562686562538147, "learning_rate": 4.5875542583100496e-05, "loss": 0.8280532360076904, "step": 3555 }, { "epoch": 4.363190184049079, "grad_norm": 0.2411998212337494, "learning_rate": 4.587278053518536e-05, "loss": 0.8204846978187561, "step": 3556 }, { "epoch": 4.3644171779141105, "grad_norm": 0.20432816445827484, "learning_rate": 4.587001764594796e-05, "loss": 0.931971549987793, "step": 3557 }, { "epoch": 4.365644171779141, "grad_norm": 0.2207249253988266, "learning_rate": 4.586725391549968e-05, "loss": 0.7961596250534058, "step": 3558 }, { "epoch": 4.366871165644172, "grad_norm": 0.2303059697151184, "learning_rate": 4.586448934395191e-05, "loss": 0.7496914863586426, "step": 3559 }, { "epoch": 4.368098159509202, "grad_norm": 0.24401357769966125, "learning_rate": 4.586172393141609e-05, "loss": 0.7698212265968323, "step": 3560 }, { "epoch": 4.369325153374233, "grad_norm": 0.2080630213022232, "learning_rate": 4.5858957678003665e-05, "loss": 0.709854781627655, "step": 3561 }, { "epoch": 4.370552147239263, "grad_norm": 0.2877208888530731, "learning_rate": 4.5856190583826155e-05, "loss": 0.8406080603599548, "step": 3562 }, { "epoch": 4.3717791411042946, "grad_norm": 0.2726186215877533, "learning_rate": 4.585342264899508e-05, "loss": 0.6434463262557983, "step": 3563 }, { "epoch": 4.373006134969325, "grad_norm": 0.2310725450515747, "learning_rate": 4.5850653873622026e-05, "loss": 0.7627007961273193, "step": 3564 }, { "epoch": 4.374233128834356, "grad_norm": 0.19788026809692383, "learning_rate": 4.5847884257818575e-05, "loss": 0.8547815084457397, "step": 3565 }, { "epoch": 4.375460122699386, "grad_norm": 0.24356244504451752, "learning_rate": 4.5845113801696364e-05, "loss": 0.9452491402626038, "step": 3566 }, { "epoch": 4.376687116564417, "grad_norm": 0.24058803915977478, "learning_rate": 4.584234250536708e-05, "loss": 1.06685471534729, "step": 3567 }, { "epoch": 4.3779141104294474, "grad_norm": 0.20605577528476715, "learning_rate": 4.58395703689424e-05, "loss": 0.799628734588623, "step": 3568 }, { "epoch": 4.379141104294479, "grad_norm": 0.17660772800445557, "learning_rate": 4.583679739253408e-05, "loss": 0.8963965177536011, "step": 3569 }, { "epoch": 4.38036809815951, "grad_norm": 0.23732540011405945, "learning_rate": 4.583402357625389e-05, "loss": 0.7247959971427917, "step": 3570 }, { "epoch": 4.38159509202454, "grad_norm": 0.24744272232055664, "learning_rate": 4.5831248920213626e-05, "loss": 0.8066186904907227, "step": 3571 }, { "epoch": 4.38282208588957, "grad_norm": 0.21191994845867157, "learning_rate": 4.582847342452513e-05, "loss": 0.8219622373580933, "step": 3572 }, { "epoch": 4.384049079754601, "grad_norm": 0.23170071840286255, "learning_rate": 4.582569708930028e-05, "loss": 0.7968897223472595, "step": 3573 }, { "epoch": 4.385276073619632, "grad_norm": 0.2636421024799347, "learning_rate": 4.5822919914650974e-05, "loss": 0.6958600282669067, "step": 3574 }, { "epoch": 4.386503067484663, "grad_norm": 0.2363457977771759, "learning_rate": 4.582014190068915e-05, "loss": 0.788109540939331, "step": 3575 }, { "epoch": 4.387730061349693, "grad_norm": 0.2722374498844147, "learning_rate": 4.581736304752679e-05, "loss": 0.6795264482498169, "step": 3576 }, { "epoch": 4.388957055214724, "grad_norm": 0.350571870803833, "learning_rate": 4.581458335527591e-05, "loss": 0.6884638667106628, "step": 3577 }, { "epoch": 4.390184049079755, "grad_norm": 0.21468998491764069, "learning_rate": 4.5811802824048525e-05, "loss": 0.729739785194397, "step": 3578 }, { "epoch": 4.391411042944785, "grad_norm": 0.1877812296152115, "learning_rate": 4.580902145395673e-05, "loss": 0.9069937467575073, "step": 3579 }, { "epoch": 4.392638036809816, "grad_norm": 0.20873042941093445, "learning_rate": 4.580623924511263e-05, "loss": 0.7128190398216248, "step": 3580 }, { "epoch": 4.393865030674847, "grad_norm": 0.26364779472351074, "learning_rate": 4.580345619762837e-05, "loss": 0.6659976840019226, "step": 3581 }, { "epoch": 4.395092024539878, "grad_norm": 0.24431785941123962, "learning_rate": 4.5800672311616123e-05, "loss": 0.7555111646652222, "step": 3582 }, { "epoch": 4.396319018404908, "grad_norm": 0.23799443244934082, "learning_rate": 4.5797887587188096e-05, "loss": 0.7169034481048584, "step": 3583 }, { "epoch": 4.397546012269939, "grad_norm": 0.22816726565361023, "learning_rate": 4.5795102024456535e-05, "loss": 0.712560772895813, "step": 3584 }, { "epoch": 4.398773006134969, "grad_norm": 0.256673663854599, "learning_rate": 4.579231562353373e-05, "loss": 0.5092991590499878, "step": 3585 }, { "epoch": 4.4, "grad_norm": 0.21464693546295166, "learning_rate": 4.5789528384531975e-05, "loss": 0.7428768873214722, "step": 3586 }, { "epoch": 4.401226993865031, "grad_norm": 0.2559557855129242, "learning_rate": 4.5786740307563636e-05, "loss": 0.686142086982727, "step": 3587 }, { "epoch": 4.402453987730062, "grad_norm": 0.2760314643383026, "learning_rate": 4.5783951392741077e-05, "loss": 0.6090893745422363, "step": 3588 }, { "epoch": 4.403680981595092, "grad_norm": 0.23607411980628967, "learning_rate": 4.578116164017671e-05, "loss": 0.8239355087280273, "step": 3589 }, { "epoch": 4.404907975460123, "grad_norm": 0.2165379524230957, "learning_rate": 4.577837104998299e-05, "loss": 0.64925217628479, "step": 3590 }, { "epoch": 4.406134969325153, "grad_norm": 0.21764104068279266, "learning_rate": 4.5775579622272394e-05, "loss": 0.7876124382019043, "step": 3591 }, { "epoch": 4.407361963190184, "grad_norm": 0.2453155517578125, "learning_rate": 4.5772787357157433e-05, "loss": 0.9231644868850708, "step": 3592 }, { "epoch": 4.408588957055215, "grad_norm": 0.2128518968820572, "learning_rate": 4.576999425475066e-05, "loss": 0.7022255659103394, "step": 3593 }, { "epoch": 4.409815950920246, "grad_norm": 0.19850276410579681, "learning_rate": 4.576720031516467e-05, "loss": 0.8574470281600952, "step": 3594 }, { "epoch": 4.411042944785276, "grad_norm": 0.2329694628715515, "learning_rate": 4.576440553851205e-05, "loss": 0.7718992233276367, "step": 3595 }, { "epoch": 4.412269938650307, "grad_norm": 0.22368590533733368, "learning_rate": 4.576160992490547e-05, "loss": 0.8791519403457642, "step": 3596 }, { "epoch": 4.413496932515337, "grad_norm": 0.21422290802001953, "learning_rate": 4.5758813474457606e-05, "loss": 0.853071928024292, "step": 3597 }, { "epoch": 4.414723926380368, "grad_norm": 0.24174533784389496, "learning_rate": 4.5756016187281184e-05, "loss": 0.7468602657318115, "step": 3598 }, { "epoch": 4.415950920245399, "grad_norm": 0.21415545046329498, "learning_rate": 4.575321806348894e-05, "loss": 0.7901430130004883, "step": 3599 }, { "epoch": 4.41717791411043, "grad_norm": 0.22358788549900055, "learning_rate": 4.5750419103193665e-05, "loss": 0.7876269221305847, "step": 3600 }, { "epoch": 4.41840490797546, "grad_norm": 0.19366581737995148, "learning_rate": 4.5747619306508183e-05, "loss": 0.9370091557502747, "step": 3601 }, { "epoch": 4.419631901840491, "grad_norm": 0.21950025856494904, "learning_rate": 4.5744818673545345e-05, "loss": 0.7369838953018188, "step": 3602 }, { "epoch": 4.420858895705521, "grad_norm": 0.23014584183692932, "learning_rate": 4.574201720441802e-05, "loss": 0.6013737916946411, "step": 3603 }, { "epoch": 4.422085889570552, "grad_norm": 0.23058216273784637, "learning_rate": 4.573921489923915e-05, "loss": 0.7731649875640869, "step": 3604 }, { "epoch": 4.423312883435583, "grad_norm": 0.24616308510303497, "learning_rate": 4.573641175812167e-05, "loss": 0.7996979355812073, "step": 3605 }, { "epoch": 4.424539877300614, "grad_norm": 0.31691399216651917, "learning_rate": 4.573360778117858e-05, "loss": 0.5891534090042114, "step": 3606 }, { "epoch": 4.425766871165644, "grad_norm": 0.20640750229358673, "learning_rate": 4.573080296852289e-05, "loss": 0.8268564939498901, "step": 3607 }, { "epoch": 4.426993865030675, "grad_norm": 0.22577263414859772, "learning_rate": 4.5727997320267655e-05, "loss": 0.801459550857544, "step": 3608 }, { "epoch": 4.428220858895705, "grad_norm": 0.23161157965660095, "learning_rate": 4.572519083652598e-05, "loss": 0.6837670207023621, "step": 3609 }, { "epoch": 4.429447852760736, "grad_norm": 0.2623501121997833, "learning_rate": 4.572238351741096e-05, "loss": 0.7348164916038513, "step": 3610 }, { "epoch": 4.430674846625767, "grad_norm": 0.19694721698760986, "learning_rate": 4.5719575363035773e-05, "loss": 0.9388207197189331, "step": 3611 }, { "epoch": 4.431901840490798, "grad_norm": 0.24787180125713348, "learning_rate": 4.571676637351359e-05, "loss": 0.7303414344787598, "step": 3612 }, { "epoch": 4.433128834355828, "grad_norm": 0.21792268753051758, "learning_rate": 4.571395654895764e-05, "loss": 0.7279622554779053, "step": 3613 }, { "epoch": 4.434355828220859, "grad_norm": 0.2687225043773651, "learning_rate": 4.5711145889481177e-05, "loss": 0.5583107471466064, "step": 3614 }, { "epoch": 4.435582822085889, "grad_norm": 0.2572178542613983, "learning_rate": 4.5708334395197496e-05, "loss": 0.8941605091094971, "step": 3615 }, { "epoch": 4.43680981595092, "grad_norm": 0.18922662734985352, "learning_rate": 4.5705522066219916e-05, "loss": 0.7394393682479858, "step": 3616 }, { "epoch": 4.438036809815951, "grad_norm": 0.22526337206363678, "learning_rate": 4.57027089026618e-05, "loss": 0.7720183730125427, "step": 3617 }, { "epoch": 4.439263803680982, "grad_norm": 0.25383466482162476, "learning_rate": 4.569989490463653e-05, "loss": 0.8111212253570557, "step": 3618 }, { "epoch": 4.440490797546012, "grad_norm": 0.27330535650253296, "learning_rate": 4.569708007225754e-05, "loss": 0.726045548915863, "step": 3619 }, { "epoch": 4.441717791411043, "grad_norm": 0.18657927215099335, "learning_rate": 4.569426440563827e-05, "loss": 1.0310649871826172, "step": 3620 }, { "epoch": 4.442944785276073, "grad_norm": 0.20370081067085266, "learning_rate": 4.5691447904892246e-05, "loss": 1.0523368120193481, "step": 3621 }, { "epoch": 4.444171779141104, "grad_norm": 0.22658458352088928, "learning_rate": 4.5688630570132954e-05, "loss": 0.8411576747894287, "step": 3622 }, { "epoch": 4.445398773006135, "grad_norm": 0.32821425795555115, "learning_rate": 4.568581240147397e-05, "loss": 0.6310772895812988, "step": 3623 }, { "epoch": 4.446625766871166, "grad_norm": 0.22507435083389282, "learning_rate": 4.56829933990289e-05, "loss": 0.8482420444488525, "step": 3624 }, { "epoch": 4.447852760736196, "grad_norm": 0.23221275210380554, "learning_rate": 4.568017356291134e-05, "loss": 0.7954868078231812, "step": 3625 }, { "epoch": 4.449079754601227, "grad_norm": 0.23192285001277924, "learning_rate": 4.567735289323496e-05, "loss": 0.7929030656814575, "step": 3626 }, { "epoch": 4.450306748466257, "grad_norm": 0.23041337728500366, "learning_rate": 4.5674531390113475e-05, "loss": 0.703667163848877, "step": 3627 }, { "epoch": 4.451533742331288, "grad_norm": 0.21109649538993835, "learning_rate": 4.567170905366059e-05, "loss": 0.7622562646865845, "step": 3628 }, { "epoch": 4.452760736196319, "grad_norm": 0.3048725426197052, "learning_rate": 4.5668885883990063e-05, "loss": 0.5053634643554688, "step": 3629 }, { "epoch": 4.45398773006135, "grad_norm": 0.22383099794387817, "learning_rate": 4.566606188121571e-05, "loss": 0.8780859708786011, "step": 3630 }, { "epoch": 4.45521472392638, "grad_norm": 0.2020450234413147, "learning_rate": 4.5663237045451335e-05, "loss": 0.8285715579986572, "step": 3631 }, { "epoch": 4.456441717791411, "grad_norm": 0.24172624945640564, "learning_rate": 4.566041137681081e-05, "loss": 0.7883837223052979, "step": 3632 }, { "epoch": 4.457668711656442, "grad_norm": 0.23547989130020142, "learning_rate": 4.565758487540802e-05, "loss": 0.8081951141357422, "step": 3633 }, { "epoch": 4.458895705521472, "grad_norm": 0.24832794070243835, "learning_rate": 4.5654757541356915e-05, "loss": 0.731653094291687, "step": 3634 }, { "epoch": 4.460122699386503, "grad_norm": 0.2335531860589981, "learning_rate": 4.565192937477143e-05, "loss": 0.748865008354187, "step": 3635 }, { "epoch": 4.461349693251534, "grad_norm": 0.27174112200737, "learning_rate": 4.564910037576558e-05, "loss": 0.6748929023742676, "step": 3636 }, { "epoch": 4.462576687116565, "grad_norm": 0.28900158405303955, "learning_rate": 4.5646270544453386e-05, "loss": 0.7754471302032471, "step": 3637 }, { "epoch": 4.463803680981595, "grad_norm": 0.2669828236103058, "learning_rate": 4.564343988094891e-05, "loss": 0.8005372285842896, "step": 3638 }, { "epoch": 4.465030674846625, "grad_norm": 0.18855169415473938, "learning_rate": 4.564060838536625e-05, "loss": 0.9453669190406799, "step": 3639 }, { "epoch": 4.466257668711656, "grad_norm": 0.2313176393508911, "learning_rate": 4.563777605781954e-05, "loss": 0.8428722620010376, "step": 3640 }, { "epoch": 4.4674846625766875, "grad_norm": 0.2486836016178131, "learning_rate": 4.5634942898422925e-05, "loss": 0.777513325214386, "step": 3641 }, { "epoch": 4.468711656441718, "grad_norm": 0.22787337005138397, "learning_rate": 4.563210890729063e-05, "loss": 0.7725852727890015, "step": 3642 }, { "epoch": 4.469938650306749, "grad_norm": 0.28778693079948425, "learning_rate": 4.562927408453685e-05, "loss": 0.574951708316803, "step": 3643 }, { "epoch": 4.471165644171779, "grad_norm": 0.22597508132457733, "learning_rate": 4.562643843027589e-05, "loss": 0.6531752347946167, "step": 3644 }, { "epoch": 4.47239263803681, "grad_norm": 0.2142573893070221, "learning_rate": 4.5623601944622016e-05, "loss": 0.8840475082397461, "step": 3645 }, { "epoch": 4.47361963190184, "grad_norm": 0.23326285183429718, "learning_rate": 4.562076462768956e-05, "loss": 0.8702311515808105, "step": 3646 }, { "epoch": 4.4748466257668715, "grad_norm": 0.26945844292640686, "learning_rate": 4.56179264795929e-05, "loss": 0.7020714282989502, "step": 3647 }, { "epoch": 4.476073619631902, "grad_norm": 0.22884762287139893, "learning_rate": 4.561508750044643e-05, "loss": 0.7774190902709961, "step": 3648 }, { "epoch": 4.477300613496933, "grad_norm": 0.2402324229478836, "learning_rate": 4.561224769036459e-05, "loss": 0.7465567588806152, "step": 3649 }, { "epoch": 4.478527607361963, "grad_norm": 0.25581085681915283, "learning_rate": 4.5609407049461815e-05, "loss": 0.6393306255340576, "step": 3650 }, { "epoch": 4.479754601226994, "grad_norm": 0.20997148752212524, "learning_rate": 4.560656557785262e-05, "loss": 0.84447181224823, "step": 3651 }, { "epoch": 4.480981595092024, "grad_norm": 0.25130587816238403, "learning_rate": 4.560372327565155e-05, "loss": 0.7037813663482666, "step": 3652 }, { "epoch": 4.4822085889570555, "grad_norm": 0.1892334520816803, "learning_rate": 4.5600880142973155e-05, "loss": 0.8679920434951782, "step": 3653 }, { "epoch": 4.483435582822086, "grad_norm": 0.18264111876487732, "learning_rate": 4.559803617993204e-05, "loss": 0.8641389608383179, "step": 3654 }, { "epoch": 4.484662576687117, "grad_norm": 0.2522410452365875, "learning_rate": 4.5595191386642834e-05, "loss": 0.822079062461853, "step": 3655 }, { "epoch": 4.485889570552147, "grad_norm": 0.22352813184261322, "learning_rate": 4.55923457632202e-05, "loss": 0.9585291147232056, "step": 3656 }, { "epoch": 4.487116564417178, "grad_norm": 0.26900166273117065, "learning_rate": 4.558949930977884e-05, "loss": 0.7105143070220947, "step": 3657 }, { "epoch": 4.488343558282208, "grad_norm": 0.29084402322769165, "learning_rate": 4.558665202643348e-05, "loss": 0.6003997325897217, "step": 3658 }, { "epoch": 4.4895705521472395, "grad_norm": 0.23036955296993256, "learning_rate": 4.55838039132989e-05, "loss": 0.9111760258674622, "step": 3659 }, { "epoch": 4.49079754601227, "grad_norm": 0.2881357967853546, "learning_rate": 4.558095497048989e-05, "loss": 0.7372642755508423, "step": 3660 }, { "epoch": 4.492024539877301, "grad_norm": 0.29720115661621094, "learning_rate": 4.557810519812128e-05, "loss": 0.6001980304718018, "step": 3661 }, { "epoch": 4.493251533742331, "grad_norm": 0.2613126039505005, "learning_rate": 4.557525459630794e-05, "loss": 0.7991828918457031, "step": 3662 }, { "epoch": 4.494478527607362, "grad_norm": 0.22860941290855408, "learning_rate": 4.5572403165164776e-05, "loss": 0.8814657330513, "step": 3663 }, { "epoch": 4.495705521472392, "grad_norm": 0.28162240982055664, "learning_rate": 4.556955090480671e-05, "loss": 0.5357230305671692, "step": 3664 }, { "epoch": 4.4969325153374236, "grad_norm": 0.22142739593982697, "learning_rate": 4.5566697815348715e-05, "loss": 0.7818315029144287, "step": 3665 }, { "epoch": 4.498159509202454, "grad_norm": 0.22031095623970032, "learning_rate": 4.556384389690578e-05, "loss": 0.7506803870201111, "step": 3666 }, { "epoch": 4.499386503067485, "grad_norm": 0.22948309779167175, "learning_rate": 4.556098914959296e-05, "loss": 0.602472186088562, "step": 3667 }, { "epoch": 4.500613496932515, "grad_norm": 0.19840429723262787, "learning_rate": 4.55581335735253e-05, "loss": 0.8641633987426758, "step": 3668 }, { "epoch": 4.501840490797546, "grad_norm": 0.22241166234016418, "learning_rate": 4.555527716881791e-05, "loss": 0.750194251537323, "step": 3669 }, { "epoch": 4.5030674846625764, "grad_norm": 0.19015109539031982, "learning_rate": 4.555241993558593e-05, "loss": 0.8461611270904541, "step": 3670 }, { "epoch": 4.504294478527608, "grad_norm": 0.22754693031311035, "learning_rate": 4.554956187394452e-05, "loss": 0.9397517442703247, "step": 3671 }, { "epoch": 4.505521472392638, "grad_norm": 0.2586202323436737, "learning_rate": 4.554670298400887e-05, "loss": 0.801832914352417, "step": 3672 }, { "epoch": 4.506748466257669, "grad_norm": 0.2320723831653595, "learning_rate": 4.554384326589424e-05, "loss": 0.7439591288566589, "step": 3673 }, { "epoch": 4.507975460122699, "grad_norm": 0.2545829117298126, "learning_rate": 4.5540982719715866e-05, "loss": 0.7658528089523315, "step": 3674 }, { "epoch": 4.50920245398773, "grad_norm": 0.2230520397424698, "learning_rate": 4.553812134558907e-05, "loss": 0.9000067710876465, "step": 3675 }, { "epoch": 4.5104294478527605, "grad_norm": 0.2287236452102661, "learning_rate": 4.553525914362917e-05, "loss": 0.9254120588302612, "step": 3676 }, { "epoch": 4.511656441717792, "grad_norm": 0.2146894335746765, "learning_rate": 4.553239611395155e-05, "loss": 0.7136527895927429, "step": 3677 }, { "epoch": 4.512883435582822, "grad_norm": 0.20067092776298523, "learning_rate": 4.55295322566716e-05, "loss": 0.8538784980773926, "step": 3678 }, { "epoch": 4.514110429447853, "grad_norm": 0.2209358960390091, "learning_rate": 4.552666757190476e-05, "loss": 0.7918183207511902, "step": 3679 }, { "epoch": 4.515337423312883, "grad_norm": 0.20534129440784454, "learning_rate": 4.5523802059766494e-05, "loss": 0.8258259296417236, "step": 3680 }, { "epoch": 4.516564417177914, "grad_norm": 0.23562316596508026, "learning_rate": 4.5520935720372305e-05, "loss": 0.6368833184242249, "step": 3681 }, { "epoch": 4.5177914110429445, "grad_norm": 0.19081182777881622, "learning_rate": 4.5518068553837723e-05, "loss": 0.8584858179092407, "step": 3682 }, { "epoch": 4.519018404907976, "grad_norm": 0.2219909429550171, "learning_rate": 4.551520056027831e-05, "loss": 1.0142191648483276, "step": 3683 }, { "epoch": 4.520245398773006, "grad_norm": 0.2753700911998749, "learning_rate": 4.551233173980968e-05, "loss": 0.7082715630531311, "step": 3684 }, { "epoch": 4.521472392638037, "grad_norm": 0.21280646324157715, "learning_rate": 4.550946209254746e-05, "loss": 0.8034528493881226, "step": 3685 }, { "epoch": 4.522699386503067, "grad_norm": 0.20233051478862762, "learning_rate": 4.550659161860732e-05, "loss": 0.8552168011665344, "step": 3686 }, { "epoch": 4.523926380368098, "grad_norm": 0.26037824153900146, "learning_rate": 4.5503720318104955e-05, "loss": 0.7256996631622314, "step": 3687 }, { "epoch": 4.5251533742331285, "grad_norm": 0.2185225635766983, "learning_rate": 4.5500848191156106e-05, "loss": 0.7792422771453857, "step": 3688 }, { "epoch": 4.52638036809816, "grad_norm": 0.21785899996757507, "learning_rate": 4.5497975237876534e-05, "loss": 0.8085280656814575, "step": 3689 }, { "epoch": 4.52760736196319, "grad_norm": 0.21653856337070465, "learning_rate": 4.549510145838204e-05, "loss": 0.8390034437179565, "step": 3690 }, { "epoch": 4.528834355828221, "grad_norm": 0.23906540870666504, "learning_rate": 4.5492226852788464e-05, "loss": 0.6804565191268921, "step": 3691 }, { "epoch": 4.530061349693252, "grad_norm": 0.2602219581604004, "learning_rate": 4.548935142121167e-05, "loss": 0.8474458456039429, "step": 3692 }, { "epoch": 4.531288343558282, "grad_norm": 0.24800604581832886, "learning_rate": 4.548647516376755e-05, "loss": 0.6257781386375427, "step": 3693 }, { "epoch": 4.5325153374233125, "grad_norm": 0.2778492867946625, "learning_rate": 4.548359808057205e-05, "loss": 0.871578574180603, "step": 3694 }, { "epoch": 4.533742331288344, "grad_norm": 0.2826344668865204, "learning_rate": 4.5480720171741135e-05, "loss": 0.6038280725479126, "step": 3695 }, { "epoch": 4.534969325153375, "grad_norm": 0.2572513520717621, "learning_rate": 4.54778414373908e-05, "loss": 0.7266067266464233, "step": 3696 }, { "epoch": 4.536196319018405, "grad_norm": 0.17882908880710602, "learning_rate": 4.547496187763708e-05, "loss": 0.8072957396507263, "step": 3697 }, { "epoch": 4.537423312883435, "grad_norm": 0.31564563512802124, "learning_rate": 4.547208149259605e-05, "loss": 0.68392014503479, "step": 3698 }, { "epoch": 4.538650306748466, "grad_norm": 0.22393223643302917, "learning_rate": 4.54692002823838e-05, "loss": 0.7613935470581055, "step": 3699 }, { "epoch": 4.539877300613497, "grad_norm": 0.2684192359447479, "learning_rate": 4.546631824711646e-05, "loss": 0.645770788192749, "step": 3700 }, { "epoch": 4.541104294478528, "grad_norm": 0.21950720250606537, "learning_rate": 4.5463435386910215e-05, "loss": 0.7845755815505981, "step": 3701 }, { "epoch": 4.542331288343558, "grad_norm": 0.31071221828460693, "learning_rate": 4.546055170188124e-05, "loss": 0.5498073101043701, "step": 3702 }, { "epoch": 4.543558282208589, "grad_norm": 0.2399589717388153, "learning_rate": 4.545766719214579e-05, "loss": 0.6538053750991821, "step": 3703 }, { "epoch": 4.54478527607362, "grad_norm": 0.18890784680843353, "learning_rate": 4.5454781857820114e-05, "loss": 0.8247617483139038, "step": 3704 }, { "epoch": 4.54601226993865, "grad_norm": 0.2289976328611374, "learning_rate": 4.545189569902053e-05, "loss": 0.9172046184539795, "step": 3705 }, { "epoch": 4.5472392638036805, "grad_norm": 0.24558578431606293, "learning_rate": 4.544900871586336e-05, "loss": 0.7486671209335327, "step": 3706 }, { "epoch": 4.548466257668712, "grad_norm": 0.21590621769428253, "learning_rate": 4.544612090846496e-05, "loss": 0.7946643829345703, "step": 3707 }, { "epoch": 4.549693251533743, "grad_norm": 0.2137933075428009, "learning_rate": 4.544323227694175e-05, "loss": 0.7339245080947876, "step": 3708 }, { "epoch": 4.550920245398773, "grad_norm": 0.3025422990322113, "learning_rate": 4.5440342821410154e-05, "loss": 0.8747251033782959, "step": 3709 }, { "epoch": 4.552147239263804, "grad_norm": 0.22719065845012665, "learning_rate": 4.543745254198664e-05, "loss": 0.7873328328132629, "step": 3710 }, { "epoch": 4.553374233128834, "grad_norm": 0.20812241733074188, "learning_rate": 4.543456143878769e-05, "loss": 0.8522701859474182, "step": 3711 }, { "epoch": 4.554601226993865, "grad_norm": 0.21082338690757751, "learning_rate": 4.5431669511929863e-05, "loss": 0.9109253287315369, "step": 3712 }, { "epoch": 4.555828220858896, "grad_norm": 0.1995248645544052, "learning_rate": 4.54287767615297e-05, "loss": 0.8535338044166565, "step": 3713 }, { "epoch": 4.557055214723927, "grad_norm": 0.24010437726974487, "learning_rate": 4.5425883187703824e-05, "loss": 0.6010366678237915, "step": 3714 }, { "epoch": 4.558282208588957, "grad_norm": 0.19191791117191315, "learning_rate": 4.5422988790568846e-05, "loss": 0.8652875423431396, "step": 3715 }, { "epoch": 4.559509202453988, "grad_norm": 0.21488767862319946, "learning_rate": 4.542009357024143e-05, "loss": 0.7711038589477539, "step": 3716 }, { "epoch": 4.560736196319018, "grad_norm": 0.21290293335914612, "learning_rate": 4.541719752683829e-05, "loss": 0.7812492251396179, "step": 3717 }, { "epoch": 4.561963190184049, "grad_norm": 0.23136800527572632, "learning_rate": 4.541430066047615e-05, "loss": 0.7970465421676636, "step": 3718 }, { "epoch": 4.56319018404908, "grad_norm": 0.23191997408866882, "learning_rate": 4.5411402971271765e-05, "loss": 0.938908576965332, "step": 3719 }, { "epoch": 4.564417177914111, "grad_norm": 0.19633060693740845, "learning_rate": 4.540850445934195e-05, "loss": 0.9137487411499023, "step": 3720 }, { "epoch": 4.565644171779141, "grad_norm": 0.23059222102165222, "learning_rate": 4.5405605124803527e-05, "loss": 0.7526825666427612, "step": 3721 }, { "epoch": 4.566871165644172, "grad_norm": 0.1848897486925125, "learning_rate": 4.5402704967773356e-05, "loss": 0.8744078874588013, "step": 3722 }, { "epoch": 4.568098159509202, "grad_norm": 0.2316356748342514, "learning_rate": 4.539980398836835e-05, "loss": 0.7857775688171387, "step": 3723 }, { "epoch": 4.569325153374233, "grad_norm": 0.23939214646816254, "learning_rate": 4.539690218670541e-05, "loss": 0.7646298408508301, "step": 3724 }, { "epoch": 4.570552147239264, "grad_norm": 0.24572302401065826, "learning_rate": 4.539399956290152e-05, "loss": 0.8447686433792114, "step": 3725 }, { "epoch": 4.571779141104295, "grad_norm": 0.24864709377288818, "learning_rate": 4.5391096117073684e-05, "loss": 0.697338342666626, "step": 3726 }, { "epoch": 4.573006134969325, "grad_norm": 0.19908322393894196, "learning_rate": 4.538819184933891e-05, "loss": 0.9300003051757812, "step": 3727 }, { "epoch": 4.574233128834356, "grad_norm": 0.1883964240550995, "learning_rate": 4.5385286759814286e-05, "loss": 0.9682067036628723, "step": 3728 }, { "epoch": 4.575460122699386, "grad_norm": 0.26519691944122314, "learning_rate": 4.538238084861688e-05, "loss": 0.9072445631027222, "step": 3729 }, { "epoch": 4.576687116564417, "grad_norm": 0.269876629114151, "learning_rate": 4.537947411586384e-05, "loss": 0.7639573812484741, "step": 3730 }, { "epoch": 4.577914110429448, "grad_norm": 0.2698752284049988, "learning_rate": 4.537656656167232e-05, "loss": 0.7450636625289917, "step": 3731 }, { "epoch": 4.579141104294479, "grad_norm": 0.22230350971221924, "learning_rate": 4.537365818615952e-05, "loss": 0.7962805032730103, "step": 3732 }, { "epoch": 4.580368098159509, "grad_norm": 0.20938685536384583, "learning_rate": 4.537074898944266e-05, "loss": 0.8989408016204834, "step": 3733 }, { "epoch": 4.58159509202454, "grad_norm": 0.2219870686531067, "learning_rate": 4.536783897163901e-05, "loss": 0.7220679521560669, "step": 3734 }, { "epoch": 4.58282208588957, "grad_norm": 0.2147219032049179, "learning_rate": 4.536492813286586e-05, "loss": 1.0079034566879272, "step": 3735 }, { "epoch": 4.584049079754601, "grad_norm": 0.2420881986618042, "learning_rate": 4.536201647324054e-05, "loss": 0.804877519607544, "step": 3736 }, { "epoch": 4.585276073619632, "grad_norm": 0.2206210494041443, "learning_rate": 4.535910399288041e-05, "loss": 0.7718076705932617, "step": 3737 }, { "epoch": 4.586503067484663, "grad_norm": 0.255257785320282, "learning_rate": 4.535619069190288e-05, "loss": 0.826449453830719, "step": 3738 }, { "epoch": 4.587730061349693, "grad_norm": 0.3045845329761505, "learning_rate": 4.535327657042534e-05, "loss": 0.5943605303764343, "step": 3739 }, { "epoch": 4.588957055214724, "grad_norm": 0.26304134726524353, "learning_rate": 4.535036162856528e-05, "loss": 0.8701954483985901, "step": 3740 }, { "epoch": 4.590184049079754, "grad_norm": 0.231559157371521, "learning_rate": 4.534744586644019e-05, "loss": 0.797688364982605, "step": 3741 }, { "epoch": 4.591411042944785, "grad_norm": 0.21082347631454468, "learning_rate": 4.534452928416757e-05, "loss": 0.9175177812576294, "step": 3742 }, { "epoch": 4.592638036809816, "grad_norm": 0.23398450016975403, "learning_rate": 4.534161188186502e-05, "loss": 0.6849343776702881, "step": 3743 }, { "epoch": 4.593865030674847, "grad_norm": 0.2546641230583191, "learning_rate": 4.53386936596501e-05, "loss": 0.7446158528327942, "step": 3744 }, { "epoch": 4.595092024539877, "grad_norm": 0.23358526825904846, "learning_rate": 4.533577461764045e-05, "loss": 0.773057222366333, "step": 3745 }, { "epoch": 4.596319018404908, "grad_norm": 0.21819579601287842, "learning_rate": 4.5332854755953726e-05, "loss": 0.7337226867675781, "step": 3746 }, { "epoch": 4.597546012269938, "grad_norm": 0.24784332513809204, "learning_rate": 4.5329934074707616e-05, "loss": 0.6958850622177124, "step": 3747 }, { "epoch": 4.598773006134969, "grad_norm": 0.21279023587703705, "learning_rate": 4.532701257401984e-05, "loss": 0.7654991149902344, "step": 3748 }, { "epoch": 4.6, "grad_norm": 0.22381338477134705, "learning_rate": 4.532409025400817e-05, "loss": 0.7435764074325562, "step": 3749 }, { "epoch": 4.601226993865031, "grad_norm": 0.24018125236034393, "learning_rate": 4.5321167114790385e-05, "loss": 0.7412440776824951, "step": 3750 }, { "epoch": 4.602453987730061, "grad_norm": 0.27034711837768555, "learning_rate": 4.531824315648431e-05, "loss": 0.92814701795578, "step": 3751 }, { "epoch": 4.603680981595092, "grad_norm": 0.21870078146457672, "learning_rate": 4.5315318379207806e-05, "loss": 0.8559696674346924, "step": 3752 }, { "epoch": 4.604907975460122, "grad_norm": 0.23399312794208527, "learning_rate": 4.5312392783078756e-05, "loss": 0.8403365612030029, "step": 3753 }, { "epoch": 4.606134969325153, "grad_norm": 0.20934602618217468, "learning_rate": 4.5309466368215086e-05, "loss": 0.7424941658973694, "step": 3754 }, { "epoch": 4.6073619631901845, "grad_norm": 0.21295614540576935, "learning_rate": 4.530653913473475e-05, "loss": 0.7355087399482727, "step": 3755 }, { "epoch": 4.608588957055215, "grad_norm": 0.2629204988479614, "learning_rate": 4.5303611082755744e-05, "loss": 0.5458804368972778, "step": 3756 }, { "epoch": 4.609815950920245, "grad_norm": 0.2729099988937378, "learning_rate": 4.5300682212396075e-05, "loss": 0.896275520324707, "step": 3757 }, { "epoch": 4.611042944785276, "grad_norm": 0.2050817757844925, "learning_rate": 4.5297752523773815e-05, "loss": 0.9193210005760193, "step": 3758 }, { "epoch": 4.612269938650307, "grad_norm": 0.23762141168117523, "learning_rate": 4.529482201700703e-05, "loss": 0.6566680669784546, "step": 3759 }, { "epoch": 4.613496932515337, "grad_norm": 0.22524738311767578, "learning_rate": 4.5291890692213856e-05, "loss": 0.8941329717636108, "step": 3760 }, { "epoch": 4.614723926380368, "grad_norm": 0.2616147994995117, "learning_rate": 4.528895854951245e-05, "loss": 0.8482897281646729, "step": 3761 }, { "epoch": 4.615950920245399, "grad_norm": 0.20538467168807983, "learning_rate": 4.528602558902099e-05, "loss": 0.7355161905288696, "step": 3762 }, { "epoch": 4.61717791411043, "grad_norm": 0.1941530853509903, "learning_rate": 4.5283091810857683e-05, "loss": 0.7724388837814331, "step": 3763 }, { "epoch": 4.61840490797546, "grad_norm": 0.2218277007341385, "learning_rate": 4.528015721514081e-05, "loss": 0.7566249370574951, "step": 3764 }, { "epoch": 4.61963190184049, "grad_norm": 0.29373425245285034, "learning_rate": 4.527722180198863e-05, "loss": 0.7655233144760132, "step": 3765 }, { "epoch": 4.620858895705521, "grad_norm": 0.19584457576274872, "learning_rate": 4.527428557151947e-05, "loss": 0.8859816789627075, "step": 3766 }, { "epoch": 4.6220858895705526, "grad_norm": 0.2547360956668854, "learning_rate": 4.527134852385169e-05, "loss": 0.6359395384788513, "step": 3767 }, { "epoch": 4.623312883435583, "grad_norm": 0.21027487516403198, "learning_rate": 4.526841065910367e-05, "loss": 0.7979044914245605, "step": 3768 }, { "epoch": 4.624539877300613, "grad_norm": 0.18937073647975922, "learning_rate": 4.5265471977393814e-05, "loss": 0.9509891271591187, "step": 3769 }, { "epoch": 4.625766871165644, "grad_norm": 0.20646736025810242, "learning_rate": 4.526253247884058e-05, "loss": 0.7562378644943237, "step": 3770 }, { "epoch": 4.626993865030675, "grad_norm": 0.24118290841579437, "learning_rate": 4.5259592163562456e-05, "loss": 0.8090691566467285, "step": 3771 }, { "epoch": 4.6282208588957054, "grad_norm": 0.23009051382541656, "learning_rate": 4.5256651031677964e-05, "loss": 0.7341034412384033, "step": 3772 }, { "epoch": 4.629447852760737, "grad_norm": 0.2460617870092392, "learning_rate": 4.525370908330564e-05, "loss": 0.7801302671432495, "step": 3773 }, { "epoch": 4.630674846625767, "grad_norm": 0.24837683141231537, "learning_rate": 4.5250766318564054e-05, "loss": 0.7237023115158081, "step": 3774 }, { "epoch": 4.631901840490798, "grad_norm": 0.28174716234207153, "learning_rate": 4.5247822737571846e-05, "loss": 0.8314472436904907, "step": 3775 }, { "epoch": 4.633128834355828, "grad_norm": 0.2152920961380005, "learning_rate": 4.524487834044765e-05, "loss": 0.9028242230415344, "step": 3776 }, { "epoch": 4.634355828220859, "grad_norm": 0.2263692021369934, "learning_rate": 4.5241933127310145e-05, "loss": 0.7748797535896301, "step": 3777 }, { "epoch": 4.6355828220858895, "grad_norm": 0.20765165984630585, "learning_rate": 4.523898709827805e-05, "loss": 0.9995558261871338, "step": 3778 }, { "epoch": 4.636809815950921, "grad_norm": 0.19778303802013397, "learning_rate": 4.5236040253470105e-05, "loss": 0.8927989602088928, "step": 3779 }, { "epoch": 4.638036809815951, "grad_norm": 0.2159169316291809, "learning_rate": 4.52330925930051e-05, "loss": 0.7793435454368591, "step": 3780 }, { "epoch": 4.639263803680982, "grad_norm": 0.23679150640964508, "learning_rate": 4.523014411700184e-05, "loss": 0.792186975479126, "step": 3781 }, { "epoch": 4.640490797546012, "grad_norm": 0.245662659406662, "learning_rate": 4.522719482557916e-05, "loss": 0.6701518297195435, "step": 3782 }, { "epoch": 4.641717791411043, "grad_norm": 0.2206527590751648, "learning_rate": 4.5224244718855944e-05, "loss": 0.8186520338058472, "step": 3783 }, { "epoch": 4.6429447852760735, "grad_norm": 0.24629926681518555, "learning_rate": 4.5221293796951116e-05, "loss": 0.6555737257003784, "step": 3784 }, { "epoch": 4.644171779141105, "grad_norm": 0.21151427924633026, "learning_rate": 4.52183420599836e-05, "loss": 0.8309875726699829, "step": 3785 }, { "epoch": 4.645398773006135, "grad_norm": 0.24431027472019196, "learning_rate": 4.521538950807239e-05, "loss": 0.7234464883804321, "step": 3786 }, { "epoch": 4.646625766871166, "grad_norm": 0.2461848109960556, "learning_rate": 4.521243614133648e-05, "loss": 0.7783326506614685, "step": 3787 }, { "epoch": 4.647852760736196, "grad_norm": 0.24071557819843292, "learning_rate": 4.5209481959894906e-05, "loss": 0.6673211455345154, "step": 3788 }, { "epoch": 4.649079754601227, "grad_norm": 0.20594006776809692, "learning_rate": 4.520652696386677e-05, "loss": 0.8330893516540527, "step": 3789 }, { "epoch": 4.6503067484662575, "grad_norm": 0.2595725953578949, "learning_rate": 4.520357115337115e-05, "loss": 0.6951373815536499, "step": 3790 }, { "epoch": 4.651533742331289, "grad_norm": 0.19828973710536957, "learning_rate": 4.520061452852721e-05, "loss": 0.7979562282562256, "step": 3791 }, { "epoch": 4.652760736196319, "grad_norm": 0.2392512410879135, "learning_rate": 4.5197657089454104e-05, "loss": 0.8587701916694641, "step": 3792 }, { "epoch": 4.65398773006135, "grad_norm": 0.24893534183502197, "learning_rate": 4.519469883627105e-05, "loss": 0.794816792011261, "step": 3793 }, { "epoch": 4.65521472392638, "grad_norm": 0.18328498303890228, "learning_rate": 4.519173976909728e-05, "loss": 0.8233252763748169, "step": 3794 }, { "epoch": 4.656441717791411, "grad_norm": 0.25531572103500366, "learning_rate": 4.518877988805207e-05, "loss": 0.7534337639808655, "step": 3795 }, { "epoch": 4.6576687116564415, "grad_norm": 0.20976418256759644, "learning_rate": 4.5185819193254716e-05, "loss": 0.7850933074951172, "step": 3796 }, { "epoch": 4.658895705521473, "grad_norm": 0.24935020506381989, "learning_rate": 4.518285768482457e-05, "loss": 0.6685336232185364, "step": 3797 }, { "epoch": 4.660122699386503, "grad_norm": 0.21977970004081726, "learning_rate": 4.5179895362881e-05, "loss": 0.8416726589202881, "step": 3798 }, { "epoch": 4.661349693251534, "grad_norm": 0.22405552864074707, "learning_rate": 4.51769322275434e-05, "loss": 0.829549252986908, "step": 3799 }, { "epoch": 4.662576687116564, "grad_norm": 0.2370280921459198, "learning_rate": 4.51739682789312e-05, "loss": 0.7692720293998718, "step": 3800 }, { "epoch": 4.663803680981595, "grad_norm": 0.21600645780563354, "learning_rate": 4.517100351716388e-05, "loss": 0.9649308323860168, "step": 3801 }, { "epoch": 4.6650306748466255, "grad_norm": 0.23359772562980652, "learning_rate": 4.516803794236094e-05, "loss": 0.7239924669265747, "step": 3802 }, { "epoch": 4.666257668711657, "grad_norm": 0.2609195113182068, "learning_rate": 4.516507155464191e-05, "loss": 0.9220952987670898, "step": 3803 }, { "epoch": 4.667484662576687, "grad_norm": 0.22379694879055023, "learning_rate": 4.5162104354126366e-05, "loss": 0.7693256735801697, "step": 3804 }, { "epoch": 4.668711656441718, "grad_norm": 0.2213345319032669, "learning_rate": 4.5159136340933896e-05, "loss": 0.8533531427383423, "step": 3805 }, { "epoch": 4.669938650306748, "grad_norm": 0.2161743938922882, "learning_rate": 4.5156167515184124e-05, "loss": 0.8011225461959839, "step": 3806 }, { "epoch": 4.671165644171779, "grad_norm": 0.26342761516571045, "learning_rate": 4.5153197876996736e-05, "loss": 0.8334090113639832, "step": 3807 }, { "epoch": 4.6723926380368095, "grad_norm": 0.2217378169298172, "learning_rate": 4.5150227426491423e-05, "loss": 0.7739074230194092, "step": 3808 }, { "epoch": 4.673619631901841, "grad_norm": 0.22475285828113556, "learning_rate": 4.51472561637879e-05, "loss": 1.022510290145874, "step": 3809 }, { "epoch": 4.674846625766871, "grad_norm": 0.2459096610546112, "learning_rate": 4.514428408900596e-05, "loss": 0.8251208066940308, "step": 3810 }, { "epoch": 4.676073619631902, "grad_norm": 0.19056293368339539, "learning_rate": 4.514131120226537e-05, "loss": 0.9021421670913696, "step": 3811 }, { "epoch": 4.677300613496932, "grad_norm": 0.23119619488716125, "learning_rate": 4.5138337503685976e-05, "loss": 0.785692572593689, "step": 3812 }, { "epoch": 4.678527607361963, "grad_norm": 0.2046533226966858, "learning_rate": 4.5135362993387634e-05, "loss": 0.7816044092178345, "step": 3813 }, { "epoch": 4.6797546012269935, "grad_norm": 0.23199963569641113, "learning_rate": 4.513238767149023e-05, "loss": 0.8896338939666748, "step": 3814 }, { "epoch": 4.680981595092025, "grad_norm": 0.24133539199829102, "learning_rate": 4.51294115381137e-05, "loss": 0.6950920224189758, "step": 3815 }, { "epoch": 4.682208588957055, "grad_norm": 0.23071449995040894, "learning_rate": 4.512643459337801e-05, "loss": 0.7343400120735168, "step": 3816 }, { "epoch": 4.683435582822086, "grad_norm": 0.2212027907371521, "learning_rate": 4.5123456837403134e-05, "loss": 0.9091899991035461, "step": 3817 }, { "epoch": 4.684662576687117, "grad_norm": 0.23381660878658295, "learning_rate": 4.5120478270309114e-05, "loss": 0.773690402507782, "step": 3818 }, { "epoch": 4.685889570552147, "grad_norm": 0.25799909234046936, "learning_rate": 4.5117498892216e-05, "loss": 0.6494761109352112, "step": 3819 }, { "epoch": 4.6871165644171775, "grad_norm": 0.22891709208488464, "learning_rate": 4.511451870324388e-05, "loss": 0.7743333578109741, "step": 3820 }, { "epoch": 4.688343558282209, "grad_norm": 0.2047889232635498, "learning_rate": 4.511153770351288e-05, "loss": 0.7936921119689941, "step": 3821 }, { "epoch": 4.68957055214724, "grad_norm": 0.20989416539669037, "learning_rate": 4.510855589314316e-05, "loss": 0.7602733373641968, "step": 3822 }, { "epoch": 4.69079754601227, "grad_norm": 0.1692121922969818, "learning_rate": 4.510557327225489e-05, "loss": 0.8907099962234497, "step": 3823 }, { "epoch": 4.6920245398773, "grad_norm": 0.22126203775405884, "learning_rate": 4.5102589840968314e-05, "loss": 0.7993103265762329, "step": 3824 }, { "epoch": 4.693251533742331, "grad_norm": 0.2654212415218353, "learning_rate": 4.509960559940367e-05, "loss": 0.8015289306640625, "step": 3825 }, { "epoch": 4.694478527607362, "grad_norm": 0.2446656972169876, "learning_rate": 4.509662054768126e-05, "loss": 0.7128347754478455, "step": 3826 }, { "epoch": 4.695705521472393, "grad_norm": 0.25069552659988403, "learning_rate": 4.509363468592139e-05, "loss": 0.8209255933761597, "step": 3827 }, { "epoch": 4.696932515337423, "grad_norm": 0.16958525776863098, "learning_rate": 4.509064801424441e-05, "loss": 0.9117641448974609, "step": 3828 }, { "epoch": 4.698159509202454, "grad_norm": 0.20045584440231323, "learning_rate": 4.5087660532770716e-05, "loss": 0.695158064365387, "step": 3829 }, { "epoch": 4.699386503067485, "grad_norm": 0.29242438077926636, "learning_rate": 4.5084672241620704e-05, "loss": 0.6074084043502808, "step": 3830 }, { "epoch": 4.700613496932515, "grad_norm": 0.2018248736858368, "learning_rate": 4.5081683140914847e-05, "loss": 0.8513878583908081, "step": 3831 }, { "epoch": 4.7018404907975455, "grad_norm": 0.25362181663513184, "learning_rate": 4.507869323077362e-05, "loss": 0.7892842292785645, "step": 3832 }, { "epoch": 4.703067484662577, "grad_norm": 0.21227425336837769, "learning_rate": 4.507570251131753e-05, "loss": 0.8389856815338135, "step": 3833 }, { "epoch": 4.704294478527608, "grad_norm": 0.22975920140743256, "learning_rate": 4.5072710982667134e-05, "loss": 0.7200295329093933, "step": 3834 }, { "epoch": 4.705521472392638, "grad_norm": 0.2029040902853012, "learning_rate": 4.5069718644943004e-05, "loss": 0.8092114925384521, "step": 3835 }, { "epoch": 4.706748466257669, "grad_norm": 0.29723110795021057, "learning_rate": 4.506672549826576e-05, "loss": 0.6141409873962402, "step": 3836 }, { "epoch": 4.707975460122699, "grad_norm": 0.25473085045814514, "learning_rate": 4.506373154275604e-05, "loss": 0.9090679883956909, "step": 3837 }, { "epoch": 4.70920245398773, "grad_norm": 0.24695849418640137, "learning_rate": 4.5060736778534517e-05, "loss": 0.7700980305671692, "step": 3838 }, { "epoch": 4.710429447852761, "grad_norm": 0.1997612863779068, "learning_rate": 4.505774120572193e-05, "loss": 0.7830257415771484, "step": 3839 }, { "epoch": 4.711656441717792, "grad_norm": 0.19099284708499908, "learning_rate": 4.5054744824438975e-05, "loss": 0.8298526406288147, "step": 3840 }, { "epoch": 4.712883435582822, "grad_norm": 0.18917524814605713, "learning_rate": 4.5051747634806474e-05, "loss": 0.9038972854614258, "step": 3841 }, { "epoch": 4.714110429447853, "grad_norm": 0.23021087050437927, "learning_rate": 4.504874963694521e-05, "loss": 0.7987234592437744, "step": 3842 }, { "epoch": 4.715337423312883, "grad_norm": 0.2428201586008072, "learning_rate": 4.504575083097603e-05, "loss": 0.8242741823196411, "step": 3843 }, { "epoch": 4.716564417177914, "grad_norm": 0.2348727434873581, "learning_rate": 4.50427512170198e-05, "loss": 0.8605139255523682, "step": 3844 }, { "epoch": 4.717791411042945, "grad_norm": 0.2992912828922272, "learning_rate": 4.5039750795197436e-05, "loss": 0.5682127475738525, "step": 3845 }, { "epoch": 4.719018404907976, "grad_norm": 0.19726762175559998, "learning_rate": 4.5036749565629875e-05, "loss": 0.8308160305023193, "step": 3846 }, { "epoch": 4.720245398773006, "grad_norm": 0.20110544562339783, "learning_rate": 4.503374752843808e-05, "loss": 0.8550522327423096, "step": 3847 }, { "epoch": 4.721472392638037, "grad_norm": 0.19946575164794922, "learning_rate": 4.503074468374306e-05, "loss": 0.8932974338531494, "step": 3848 }, { "epoch": 4.722699386503067, "grad_norm": 0.23333285748958588, "learning_rate": 4.502774103166586e-05, "loss": 0.7687602043151855, "step": 3849 }, { "epoch": 4.723926380368098, "grad_norm": 0.21988211572170258, "learning_rate": 4.502473657232754e-05, "loss": 0.7526895999908447, "step": 3850 }, { "epoch": 4.725153374233129, "grad_norm": 0.18914781510829926, "learning_rate": 4.502173130584919e-05, "loss": 0.7793097496032715, "step": 3851 }, { "epoch": 4.72638036809816, "grad_norm": 0.25929510593414307, "learning_rate": 4.5018725232351964e-05, "loss": 0.6801799535751343, "step": 3852 }, { "epoch": 4.72760736196319, "grad_norm": 0.20963574945926666, "learning_rate": 4.5015718351957015e-05, "loss": 0.742642879486084, "step": 3853 }, { "epoch": 4.728834355828221, "grad_norm": 0.24343150854110718, "learning_rate": 4.501271066478555e-05, "loss": 0.7656204700469971, "step": 3854 }, { "epoch": 4.730061349693251, "grad_norm": 0.24895413219928741, "learning_rate": 4.500970217095879e-05, "loss": 0.8474798202514648, "step": 3855 }, { "epoch": 4.731288343558282, "grad_norm": 0.2284584492444992, "learning_rate": 4.500669287059801e-05, "loss": 0.8783659338951111, "step": 3856 }, { "epoch": 4.732515337423313, "grad_norm": 0.25009283423423767, "learning_rate": 4.50036827638245e-05, "loss": 0.6785919070243835, "step": 3857 }, { "epoch": 4.733742331288344, "grad_norm": 0.29439786076545715, "learning_rate": 4.500067185075959e-05, "loss": 0.6781637668609619, "step": 3858 }, { "epoch": 4.734969325153374, "grad_norm": 0.21282193064689636, "learning_rate": 4.4997660131524635e-05, "loss": 0.8905094861984253, "step": 3859 }, { "epoch": 4.736196319018405, "grad_norm": 0.2928798794746399, "learning_rate": 4.499464760624105e-05, "loss": 0.7122421264648438, "step": 3860 }, { "epoch": 4.737423312883435, "grad_norm": 0.2506873309612274, "learning_rate": 4.4991634275030234e-05, "loss": 0.8298746347427368, "step": 3861 }, { "epoch": 4.738650306748466, "grad_norm": 0.22487477958202362, "learning_rate": 4.498862013801366e-05, "loss": 0.9339353442192078, "step": 3862 }, { "epoch": 4.739877300613497, "grad_norm": 0.22642339766025543, "learning_rate": 4.498560519531282e-05, "loss": 0.7983461618423462, "step": 3863 }, { "epoch": 4.741104294478528, "grad_norm": 0.2328578382730484, "learning_rate": 4.498258944704923e-05, "loss": 0.7131355404853821, "step": 3864 }, { "epoch": 4.742331288343558, "grad_norm": 0.19926932454109192, "learning_rate": 4.4979572893344456e-05, "loss": 0.8765395879745483, "step": 3865 }, { "epoch": 4.743558282208589, "grad_norm": 0.21012350916862488, "learning_rate": 4.497655553432009e-05, "loss": 0.8444768786430359, "step": 3866 }, { "epoch": 4.744785276073619, "grad_norm": 0.19414891302585602, "learning_rate": 4.497353737009774e-05, "loss": 0.7707676887512207, "step": 3867 }, { "epoch": 4.74601226993865, "grad_norm": 0.2217346876859665, "learning_rate": 4.497051840079906e-05, "loss": 0.8257302045822144, "step": 3868 }, { "epoch": 4.747239263803681, "grad_norm": 0.2121640294790268, "learning_rate": 4.496749862654574e-05, "loss": 0.9406198263168335, "step": 3869 }, { "epoch": 4.748466257668712, "grad_norm": 0.2530861496925354, "learning_rate": 4.49644780474595e-05, "loss": 0.751103401184082, "step": 3870 }, { "epoch": 4.749693251533742, "grad_norm": 0.2012452334165573, "learning_rate": 4.496145666366209e-05, "loss": 0.7346529960632324, "step": 3871 }, { "epoch": 4.750920245398773, "grad_norm": 0.20811261236667633, "learning_rate": 4.49584344752753e-05, "loss": 0.8004648685455322, "step": 3872 }, { "epoch": 4.752147239263803, "grad_norm": 0.31461766362190247, "learning_rate": 4.495541148242094e-05, "loss": 0.5943830609321594, "step": 3873 }, { "epoch": 4.7533742331288344, "grad_norm": 0.2386905997991562, "learning_rate": 4.495238768522085e-05, "loss": 0.8598381280899048, "step": 3874 }, { "epoch": 4.754601226993865, "grad_norm": 0.2434053122997284, "learning_rate": 4.494936308379693e-05, "loss": 0.7233115434646606, "step": 3875 }, { "epoch": 4.755828220858896, "grad_norm": 0.24087561666965485, "learning_rate": 4.494633767827107e-05, "loss": 0.7490500211715698, "step": 3876 }, { "epoch": 4.757055214723926, "grad_norm": 0.22140012681484222, "learning_rate": 4.4943311468765235e-05, "loss": 0.7948988080024719, "step": 3877 }, { "epoch": 4.758282208588957, "grad_norm": 0.20794588327407837, "learning_rate": 4.494028445540139e-05, "loss": 0.8697289228439331, "step": 3878 }, { "epoch": 4.759509202453987, "grad_norm": 0.25230205059051514, "learning_rate": 4.493725663830155e-05, "loss": 0.8006312251091003, "step": 3879 }, { "epoch": 4.7607361963190185, "grad_norm": 0.22391480207443237, "learning_rate": 4.493422801758776e-05, "loss": 0.7767553329467773, "step": 3880 }, { "epoch": 4.76196319018405, "grad_norm": 0.2405032515525818, "learning_rate": 4.4931198593382096e-05, "loss": 0.7986427545547485, "step": 3881 }, { "epoch": 4.76319018404908, "grad_norm": 0.26738765835762024, "learning_rate": 4.4928168365806656e-05, "loss": 0.5594518184661865, "step": 3882 }, { "epoch": 4.76441717791411, "grad_norm": 0.24367396533489227, "learning_rate": 4.492513733498358e-05, "loss": 0.7261731028556824, "step": 3883 }, { "epoch": 4.765644171779141, "grad_norm": 0.2546311318874359, "learning_rate": 4.492210550103506e-05, "loss": 0.8601680994033813, "step": 3884 }, { "epoch": 4.766871165644172, "grad_norm": 0.20384334027767181, "learning_rate": 4.491907286408328e-05, "loss": 0.772682785987854, "step": 3885 }, { "epoch": 4.7680981595092025, "grad_norm": 0.22727617621421814, "learning_rate": 4.491603942425049e-05, "loss": 0.7691712975502014, "step": 3886 }, { "epoch": 4.769325153374233, "grad_norm": 0.1790500432252884, "learning_rate": 4.4913005181658944e-05, "loss": 0.9926009178161621, "step": 3887 }, { "epoch": 4.770552147239264, "grad_norm": 0.2377728372812271, "learning_rate": 4.490997013643096e-05, "loss": 0.6329394578933716, "step": 3888 }, { "epoch": 4.771779141104295, "grad_norm": 0.2573145031929016, "learning_rate": 4.490693428868886e-05, "loss": 0.8721634149551392, "step": 3889 }, { "epoch": 4.773006134969325, "grad_norm": 0.27957063913345337, "learning_rate": 4.490389763855501e-05, "loss": 0.6817169189453125, "step": 3890 }, { "epoch": 4.774233128834355, "grad_norm": 0.20855136215686798, "learning_rate": 4.490086018615183e-05, "loss": 0.8563522100448608, "step": 3891 }, { "epoch": 4.7754601226993865, "grad_norm": 0.23572897911071777, "learning_rate": 4.4897821931601726e-05, "loss": 0.7996118068695068, "step": 3892 }, { "epoch": 4.776687116564418, "grad_norm": 0.2269423007965088, "learning_rate": 4.489478287502716e-05, "loss": 0.895775556564331, "step": 3893 }, { "epoch": 4.777914110429448, "grad_norm": 0.20658260583877563, "learning_rate": 4.489174301655066e-05, "loss": 0.771888256072998, "step": 3894 }, { "epoch": 4.779141104294479, "grad_norm": 0.23470480740070343, "learning_rate": 4.4888702356294714e-05, "loss": 0.6777859926223755, "step": 3895 }, { "epoch": 4.780368098159509, "grad_norm": 0.258799284696579, "learning_rate": 4.4885660894381906e-05, "loss": 0.6849362850189209, "step": 3896 }, { "epoch": 4.78159509202454, "grad_norm": 0.23845058679580688, "learning_rate": 4.4882618630934824e-05, "loss": 0.6551795601844788, "step": 3897 }, { "epoch": 4.7828220858895705, "grad_norm": 0.20098745822906494, "learning_rate": 4.487957556607609e-05, "loss": 0.7940206527709961, "step": 3898 }, { "epoch": 4.784049079754602, "grad_norm": 0.2461889386177063, "learning_rate": 4.487653169992837e-05, "loss": 0.8171834945678711, "step": 3899 }, { "epoch": 4.785276073619632, "grad_norm": 0.22946862876415253, "learning_rate": 4.4873487032614334e-05, "loss": 0.7379295825958252, "step": 3900 }, { "epoch": 4.786503067484663, "grad_norm": 0.2541807293891907, "learning_rate": 4.4870441564256726e-05, "loss": 0.7597071528434753, "step": 3901 }, { "epoch": 4.787730061349693, "grad_norm": 0.2737743556499481, "learning_rate": 4.486739529497829e-05, "loss": 0.6331025958061218, "step": 3902 }, { "epoch": 4.788957055214724, "grad_norm": 0.2232941836118698, "learning_rate": 4.486434822490181e-05, "loss": 0.8091917634010315, "step": 3903 }, { "epoch": 4.7901840490797545, "grad_norm": 0.3094351887702942, "learning_rate": 4.486130035415012e-05, "loss": 0.6142491698265076, "step": 3904 }, { "epoch": 4.791411042944786, "grad_norm": 0.23950225114822388, "learning_rate": 4.485825168284604e-05, "loss": 0.8343156576156616, "step": 3905 }, { "epoch": 4.792638036809816, "grad_norm": 0.2322491705417633, "learning_rate": 4.485520221111248e-05, "loss": 0.742708683013916, "step": 3906 }, { "epoch": 4.793865030674847, "grad_norm": 0.25424128770828247, "learning_rate": 4.485215193907235e-05, "loss": 0.8171655535697937, "step": 3907 }, { "epoch": 4.795092024539877, "grad_norm": 0.18427357077598572, "learning_rate": 4.4849100866848596e-05, "loss": 0.8263919353485107, "step": 3908 }, { "epoch": 4.796319018404908, "grad_norm": 0.2161429077386856, "learning_rate": 4.48460489945642e-05, "loss": 0.7524228692054749, "step": 3909 }, { "epoch": 4.7975460122699385, "grad_norm": 0.24072276055812836, "learning_rate": 4.484299632234217e-05, "loss": 0.7611297965049744, "step": 3910 }, { "epoch": 4.79877300613497, "grad_norm": 0.22485730051994324, "learning_rate": 4.483994285030555e-05, "loss": 0.9021326303482056, "step": 3911 }, { "epoch": 4.8, "grad_norm": 0.21989348530769348, "learning_rate": 4.483688857857742e-05, "loss": 0.7834089994430542, "step": 3912 }, { "epoch": 4.801226993865031, "grad_norm": 0.2522912919521332, "learning_rate": 4.4833833507280884e-05, "loss": 0.6755481362342834, "step": 3913 }, { "epoch": 4.802453987730061, "grad_norm": 0.2734640836715698, "learning_rate": 4.483077763653909e-05, "loss": 0.6812394857406616, "step": 3914 }, { "epoch": 4.803680981595092, "grad_norm": 0.3254409432411194, "learning_rate": 4.4827720966475204e-05, "loss": 0.7954015731811523, "step": 3915 }, { "epoch": 4.8049079754601225, "grad_norm": 0.25065505504608154, "learning_rate": 4.482466349721244e-05, "loss": 0.6359091997146606, "step": 3916 }, { "epoch": 4.806134969325154, "grad_norm": 0.2631398141384125, "learning_rate": 4.482160522887403e-05, "loss": 0.8678405284881592, "step": 3917 }, { "epoch": 4.807361963190184, "grad_norm": 0.2209032028913498, "learning_rate": 4.4818546161583255e-05, "loss": 0.805376410484314, "step": 3918 }, { "epoch": 4.808588957055215, "grad_norm": 0.25204721093177795, "learning_rate": 4.481548629546341e-05, "loss": 0.7512853145599365, "step": 3919 }, { "epoch": 4.809815950920245, "grad_norm": 0.22698140144348145, "learning_rate": 4.481242563063781e-05, "loss": 0.9189560413360596, "step": 3920 }, { "epoch": 4.811042944785276, "grad_norm": 0.27674275636672974, "learning_rate": 4.480936416722986e-05, "loss": 0.7421541810035706, "step": 3921 }, { "epoch": 4.8122699386503065, "grad_norm": 0.22683903574943542, "learning_rate": 4.4806301905362924e-05, "loss": 0.6695331335067749, "step": 3922 }, { "epoch": 4.813496932515338, "grad_norm": 0.27755871415138245, "learning_rate": 4.480323884516046e-05, "loss": 0.6926881670951843, "step": 3923 }, { "epoch": 4.814723926380368, "grad_norm": 0.2559722065925598, "learning_rate": 4.48001749867459e-05, "loss": 0.7483288049697876, "step": 3924 }, { "epoch": 4.815950920245399, "grad_norm": 0.2409314662218094, "learning_rate": 4.479711033024278e-05, "loss": 0.9220545887947083, "step": 3925 }, { "epoch": 4.817177914110429, "grad_norm": 0.2526824474334717, "learning_rate": 4.479404487577459e-05, "loss": 0.8635417222976685, "step": 3926 }, { "epoch": 4.81840490797546, "grad_norm": 0.24401167035102844, "learning_rate": 4.479097862346491e-05, "loss": 0.5769739747047424, "step": 3927 }, { "epoch": 4.8196319018404905, "grad_norm": 0.2103557139635086, "learning_rate": 4.478791157343732e-05, "loss": 0.8478500843048096, "step": 3928 }, { "epoch": 4.820858895705522, "grad_norm": 0.2000693529844284, "learning_rate": 4.478484372581546e-05, "loss": 0.7635329365730286, "step": 3929 }, { "epoch": 4.822085889570552, "grad_norm": 0.26822200417518616, "learning_rate": 4.478177508072298e-05, "loss": 0.6815691590309143, "step": 3930 }, { "epoch": 4.823312883435583, "grad_norm": 0.2861550748348236, "learning_rate": 4.477870563828356e-05, "loss": 0.6857540607452393, "step": 3931 }, { "epoch": 4.824539877300613, "grad_norm": 0.22112642228603363, "learning_rate": 4.477563539862093e-05, "loss": 0.7160782814025879, "step": 3932 }, { "epoch": 4.825766871165644, "grad_norm": 0.2570270001888275, "learning_rate": 4.4772564361858845e-05, "loss": 0.7350577116012573, "step": 3933 }, { "epoch": 4.8269938650306745, "grad_norm": 0.2151852250099182, "learning_rate": 4.476949252812107e-05, "loss": 0.8387589454650879, "step": 3934 }, { "epoch": 4.828220858895706, "grad_norm": 0.22800597548484802, "learning_rate": 4.476641989753144e-05, "loss": 0.701210618019104, "step": 3935 }, { "epoch": 4.829447852760736, "grad_norm": 0.26067566871643066, "learning_rate": 4.4763346470213805e-05, "loss": 0.7524499893188477, "step": 3936 }, { "epoch": 4.830674846625767, "grad_norm": 0.24606946110725403, "learning_rate": 4.476027224629204e-05, "loss": 0.846677303314209, "step": 3937 }, { "epoch": 4.831901840490797, "grad_norm": 0.27336469292640686, "learning_rate": 4.475719722589006e-05, "loss": 0.5456121563911438, "step": 3938 }, { "epoch": 4.833128834355828, "grad_norm": 0.22795765101909637, "learning_rate": 4.47541214091318e-05, "loss": 0.8226833939552307, "step": 3939 }, { "epoch": 4.8343558282208585, "grad_norm": 0.24963755905628204, "learning_rate": 4.475104479614125e-05, "loss": 0.6934236288070679, "step": 3940 }, { "epoch": 4.83558282208589, "grad_norm": 0.23631978034973145, "learning_rate": 4.4747967387042424e-05, "loss": 0.7647913694381714, "step": 3941 }, { "epoch": 4.83680981595092, "grad_norm": 0.3200684189796448, "learning_rate": 4.474488918195934e-05, "loss": 0.5518380403518677, "step": 3942 }, { "epoch": 4.838036809815951, "grad_norm": 0.24788416922092438, "learning_rate": 4.4741810181016104e-05, "loss": 0.7258909940719604, "step": 3943 }, { "epoch": 4.839263803680982, "grad_norm": 0.23542679846286774, "learning_rate": 4.4738730384336796e-05, "loss": 0.8164786100387573, "step": 3944 }, { "epoch": 4.840490797546012, "grad_norm": 0.23519344627857208, "learning_rate": 4.473564979204557e-05, "loss": 0.6943330764770508, "step": 3945 }, { "epoch": 4.8417177914110425, "grad_norm": 0.224295511841774, "learning_rate": 4.473256840426657e-05, "loss": 0.7725149989128113, "step": 3946 }, { "epoch": 4.842944785276074, "grad_norm": 0.21401910483837128, "learning_rate": 4.472948622112404e-05, "loss": 0.7753803730010986, "step": 3947 }, { "epoch": 4.844171779141105, "grad_norm": 0.2347026765346527, "learning_rate": 4.4726403242742174e-05, "loss": 0.6330918073654175, "step": 3948 }, { "epoch": 4.845398773006135, "grad_norm": 0.20128946006298065, "learning_rate": 4.472331946924526e-05, "loss": 0.966576099395752, "step": 3949 }, { "epoch": 4.846625766871165, "grad_norm": 0.30607739090919495, "learning_rate": 4.4720234900757596e-05, "loss": 0.6593180298805237, "step": 3950 }, { "epoch": 4.847852760736196, "grad_norm": 0.20309150218963623, "learning_rate": 4.47171495374035e-05, "loss": 0.8675559163093567, "step": 3951 }, { "epoch": 4.849079754601227, "grad_norm": 0.19279471039772034, "learning_rate": 4.471406337930735e-05, "loss": 0.6736041307449341, "step": 3952 }, { "epoch": 4.850306748466258, "grad_norm": 0.5202996730804443, "learning_rate": 4.471097642659352e-05, "loss": 0.860058069229126, "step": 3953 }, { "epoch": 4.851533742331288, "grad_norm": 0.21976573765277863, "learning_rate": 4.470788867938646e-05, "loss": 0.8226426839828491, "step": 3954 }, { "epoch": 4.852760736196319, "grad_norm": 0.23420412838459015, "learning_rate": 4.4704800137810606e-05, "loss": 0.5881204009056091, "step": 3955 }, { "epoch": 4.85398773006135, "grad_norm": 0.2729156017303467, "learning_rate": 4.470171080199046e-05, "loss": 0.8055133819580078, "step": 3956 }, { "epoch": 4.85521472392638, "grad_norm": 0.2284385710954666, "learning_rate": 4.469862067205055e-05, "loss": 0.7239580154418945, "step": 3957 }, { "epoch": 4.856441717791411, "grad_norm": 0.2275695502758026, "learning_rate": 4.4695529748115415e-05, "loss": 0.8850164413452148, "step": 3958 }, { "epoch": 4.857668711656442, "grad_norm": 0.26267942786216736, "learning_rate": 4.469243803030965e-05, "loss": 0.7290022373199463, "step": 3959 }, { "epoch": 4.858895705521473, "grad_norm": 0.2047361582517624, "learning_rate": 4.468934551875788e-05, "loss": 0.95073401927948, "step": 3960 }, { "epoch": 4.860122699386503, "grad_norm": 0.21685822308063507, "learning_rate": 4.468625221358473e-05, "loss": 0.8572036027908325, "step": 3961 }, { "epoch": 4.861349693251534, "grad_norm": 0.24343201518058777, "learning_rate": 4.468315811491492e-05, "loss": 0.7342446446418762, "step": 3962 }, { "epoch": 4.862576687116564, "grad_norm": 0.21692447364330292, "learning_rate": 4.468006322287314e-05, "loss": 0.8048394918441772, "step": 3963 }, { "epoch": 4.863803680981595, "grad_norm": 0.2829541265964508, "learning_rate": 4.467696753758413e-05, "loss": 0.6864960193634033, "step": 3964 }, { "epoch": 4.865030674846626, "grad_norm": 0.2458750605583191, "learning_rate": 4.467387105917269e-05, "loss": 0.7179615497589111, "step": 3965 }, { "epoch": 4.866257668711657, "grad_norm": 0.22984161972999573, "learning_rate": 4.467077378776362e-05, "loss": 0.6451505422592163, "step": 3966 }, { "epoch": 4.867484662576687, "grad_norm": 0.25746217370033264, "learning_rate": 4.466767572348176e-05, "loss": 0.7005447149276733, "step": 3967 }, { "epoch": 4.868711656441718, "grad_norm": 0.19074629247188568, "learning_rate": 4.466457686645198e-05, "loss": 0.7977602481842041, "step": 3968 }, { "epoch": 4.869938650306748, "grad_norm": 0.3155001103878021, "learning_rate": 4.4661477216799184e-05, "loss": 0.7697421312332153, "step": 3969 }, { "epoch": 4.871165644171779, "grad_norm": 0.22381901741027832, "learning_rate": 4.465837677464834e-05, "loss": 0.8560417890548706, "step": 3970 }, { "epoch": 4.87239263803681, "grad_norm": 0.28377270698547363, "learning_rate": 4.465527554012437e-05, "loss": 0.7285218238830566, "step": 3971 }, { "epoch": 4.873619631901841, "grad_norm": 0.2682044506072998, "learning_rate": 4.4652173513352326e-05, "loss": 0.8760219812393188, "step": 3972 }, { "epoch": 4.874846625766871, "grad_norm": 0.24972261488437653, "learning_rate": 4.4649070694457204e-05, "loss": 0.6893145442008972, "step": 3973 }, { "epoch": 4.876073619631902, "grad_norm": 0.31637442111968994, "learning_rate": 4.464596708356409e-05, "loss": 0.6643929481506348, "step": 3974 }, { "epoch": 4.877300613496932, "grad_norm": 0.24928277730941772, "learning_rate": 4.464286268079807e-05, "loss": 0.6254291534423828, "step": 3975 }, { "epoch": 4.8785276073619634, "grad_norm": 0.22733661532402039, "learning_rate": 4.463975748628428e-05, "loss": 0.7349869012832642, "step": 3976 }, { "epoch": 4.879754601226994, "grad_norm": 0.2701343595981598, "learning_rate": 4.4636651500147875e-05, "loss": 0.7180701494216919, "step": 3977 }, { "epoch": 4.880981595092025, "grad_norm": 0.29179731011390686, "learning_rate": 4.463354472251406e-05, "loss": 0.6848684549331665, "step": 3978 }, { "epoch": 4.882208588957055, "grad_norm": 0.2239917367696762, "learning_rate": 4.463043715350805e-05, "loss": 0.740951657295227, "step": 3979 }, { "epoch": 4.883435582822086, "grad_norm": 0.1977325826883316, "learning_rate": 4.4627328793255105e-05, "loss": 0.8740339279174805, "step": 3980 }, { "epoch": 4.884662576687116, "grad_norm": 0.26389753818511963, "learning_rate": 4.462421964188052e-05, "loss": 0.7317819595336914, "step": 3981 }, { "epoch": 4.8858895705521475, "grad_norm": 0.25379568338394165, "learning_rate": 4.462110969950961e-05, "loss": 0.6838897466659546, "step": 3982 }, { "epoch": 4.887116564417178, "grad_norm": 0.2799469828605652, "learning_rate": 4.4617998966267735e-05, "loss": 0.7395932674407959, "step": 3983 }, { "epoch": 4.888343558282209, "grad_norm": 0.21488185226917267, "learning_rate": 4.461488744228026e-05, "loss": 0.7043013572692871, "step": 3984 }, { "epoch": 4.889570552147239, "grad_norm": 0.2289891093969345, "learning_rate": 4.461177512767263e-05, "loss": 0.7837849855422974, "step": 3985 }, { "epoch": 4.89079754601227, "grad_norm": 0.29293492436408997, "learning_rate": 4.460866202257027e-05, "loss": 0.7945666313171387, "step": 3986 }, { "epoch": 4.8920245398773, "grad_norm": 0.23999105393886566, "learning_rate": 4.460554812709867e-05, "loss": 0.7333303689956665, "step": 3987 }, { "epoch": 4.8932515337423315, "grad_norm": 0.24409271776676178, "learning_rate": 4.460243344138335e-05, "loss": 0.7270734310150146, "step": 3988 }, { "epoch": 4.894478527607362, "grad_norm": 0.2331819385290146, "learning_rate": 4.4599317965549836e-05, "loss": 0.7941932678222656, "step": 3989 }, { "epoch": 4.895705521472393, "grad_norm": 0.25467774271965027, "learning_rate": 4.4596201699723714e-05, "loss": 0.7110027074813843, "step": 3990 }, { "epoch": 4.896932515337423, "grad_norm": 0.25592562556266785, "learning_rate": 4.45930846440306e-05, "loss": 0.838502049446106, "step": 3991 }, { "epoch": 4.898159509202454, "grad_norm": 0.2355378270149231, "learning_rate": 4.458996679859612e-05, "loss": 0.7725694179534912, "step": 3992 }, { "epoch": 4.899386503067484, "grad_norm": 0.20849543809890747, "learning_rate": 4.4586848163545944e-05, "loss": 0.8025857210159302, "step": 3993 }, { "epoch": 4.9006134969325155, "grad_norm": 0.28045853972435, "learning_rate": 4.4583728739005795e-05, "loss": 0.5984273552894592, "step": 3994 }, { "epoch": 4.901840490797546, "grad_norm": 0.24532543122768402, "learning_rate": 4.4580608525101386e-05, "loss": 0.7493599653244019, "step": 3995 }, { "epoch": 4.903067484662577, "grad_norm": 0.24348868429660797, "learning_rate": 4.4577487521958496e-05, "loss": 0.7556779384613037, "step": 3996 }, { "epoch": 4.904294478527607, "grad_norm": 0.31250298023223877, "learning_rate": 4.457436572970292e-05, "loss": 0.6864997148513794, "step": 3997 }, { "epoch": 4.905521472392638, "grad_norm": 0.20093846321105957, "learning_rate": 4.457124314846049e-05, "loss": 0.7971725463867188, "step": 3998 }, { "epoch": 4.906748466257668, "grad_norm": 0.21251267194747925, "learning_rate": 4.4568119778357064e-05, "loss": 0.8509422540664673, "step": 3999 }, { "epoch": 4.9079754601226995, "grad_norm": 0.25092029571533203, "learning_rate": 4.4564995619518546e-05, "loss": 0.7250447273254395, "step": 4000 }, { "epoch": 4.90920245398773, "grad_norm": 0.2742142081260681, "learning_rate": 4.4561870672070854e-05, "loss": 1.0393483638763428, "step": 4001 }, { "epoch": 4.910429447852761, "grad_norm": 0.2241465002298355, "learning_rate": 4.455874493613995e-05, "loss": 0.7381301522254944, "step": 4002 }, { "epoch": 4.911656441717791, "grad_norm": 0.25874975323677063, "learning_rate": 4.455561841185182e-05, "loss": 0.676628828048706, "step": 4003 }, { "epoch": 4.912883435582822, "grad_norm": 0.25072506070137024, "learning_rate": 4.4552491099332475e-05, "loss": 0.7217844724655151, "step": 4004 }, { "epoch": 4.914110429447852, "grad_norm": 0.2378954440355301, "learning_rate": 4.4549362998707986e-05, "loss": 0.6833716630935669, "step": 4005 }, { "epoch": 4.9153374233128835, "grad_norm": 0.2600879967212677, "learning_rate": 4.454623411010444e-05, "loss": 0.6055775880813599, "step": 4006 }, { "epoch": 4.916564417177915, "grad_norm": 0.2312774360179901, "learning_rate": 4.454310443364793e-05, "loss": 0.8339767456054688, "step": 4007 }, { "epoch": 4.917791411042945, "grad_norm": 0.24048112332820892, "learning_rate": 4.453997396946463e-05, "loss": 0.7127941250801086, "step": 4008 }, { "epoch": 4.919018404907975, "grad_norm": 0.2661172151565552, "learning_rate": 4.453684271768071e-05, "loss": 0.7255386114120483, "step": 4009 }, { "epoch": 4.920245398773006, "grad_norm": 0.2560913860797882, "learning_rate": 4.453371067842237e-05, "loss": 0.8082724809646606, "step": 4010 }, { "epoch": 4.921472392638037, "grad_norm": 0.22293931245803833, "learning_rate": 4.453057785181587e-05, "loss": 0.7596002817153931, "step": 4011 }, { "epoch": 4.9226993865030675, "grad_norm": 0.24760104715824127, "learning_rate": 4.452744423798749e-05, "loss": 0.7298862934112549, "step": 4012 }, { "epoch": 4.923926380368098, "grad_norm": 0.2794748544692993, "learning_rate": 4.452430983706351e-05, "loss": 0.793419361114502, "step": 4013 }, { "epoch": 4.925153374233129, "grad_norm": 0.2655891478061676, "learning_rate": 4.452117464917029e-05, "loss": 0.6523375511169434, "step": 4014 }, { "epoch": 4.92638036809816, "grad_norm": 0.2896873354911804, "learning_rate": 4.45180386744342e-05, "loss": 0.674249529838562, "step": 4015 }, { "epoch": 4.92760736196319, "grad_norm": 0.22535504400730133, "learning_rate": 4.4514901912981644e-05, "loss": 0.8271676301956177, "step": 4016 }, { "epoch": 4.92883435582822, "grad_norm": 0.2754197120666504, "learning_rate": 4.451176436493904e-05, "loss": 0.7241553068161011, "step": 4017 }, { "epoch": 4.9300613496932515, "grad_norm": 0.22018803656101227, "learning_rate": 4.4508626030432864e-05, "loss": 0.9350385069847107, "step": 4018 }, { "epoch": 4.931288343558283, "grad_norm": 0.23786424100399017, "learning_rate": 4.450548690958962e-05, "loss": 0.7749280333518982, "step": 4019 }, { "epoch": 4.932515337423313, "grad_norm": 0.23496896028518677, "learning_rate": 4.4502347002535825e-05, "loss": 0.6601958870887756, "step": 4020 }, { "epoch": 4.933742331288344, "grad_norm": 0.2196117639541626, "learning_rate": 4.449920630939805e-05, "loss": 0.747092604637146, "step": 4021 }, { "epoch": 4.934969325153374, "grad_norm": 0.23438312113285065, "learning_rate": 4.449606483030288e-05, "loss": 0.9391225576400757, "step": 4022 }, { "epoch": 4.936196319018405, "grad_norm": 0.23520678281784058, "learning_rate": 4.449292256537693e-05, "loss": 0.7454577684402466, "step": 4023 }, { "epoch": 4.9374233128834355, "grad_norm": 0.26538777351379395, "learning_rate": 4.4489779514746874e-05, "loss": 0.6813380718231201, "step": 4024 }, { "epoch": 4.938650306748467, "grad_norm": 0.22891049087047577, "learning_rate": 4.44866356785394e-05, "loss": 0.7595347762107849, "step": 4025 }, { "epoch": 4.939877300613497, "grad_norm": 0.23000910878181458, "learning_rate": 4.448349105688121e-05, "loss": 0.718478262424469, "step": 4026 }, { "epoch": 4.941104294478528, "grad_norm": 0.2264232337474823, "learning_rate": 4.448034564989907e-05, "loss": 0.7527478337287903, "step": 4027 }, { "epoch": 4.942331288343558, "grad_norm": 0.27548736333847046, "learning_rate": 4.447719945771975e-05, "loss": 0.8147604465484619, "step": 4028 }, { "epoch": 4.943558282208589, "grad_norm": 0.27491295337677, "learning_rate": 4.447405248047008e-05, "loss": 0.7471411228179932, "step": 4029 }, { "epoch": 4.9447852760736195, "grad_norm": 0.18368352949619293, "learning_rate": 4.44709047182769e-05, "loss": 0.8906035423278809, "step": 4030 }, { "epoch": 4.946012269938651, "grad_norm": 0.21899160742759705, "learning_rate": 4.446775617126707e-05, "loss": 0.8031107187271118, "step": 4031 }, { "epoch": 4.947239263803681, "grad_norm": 0.17986084520816803, "learning_rate": 4.4464606839567515e-05, "loss": 0.894313395023346, "step": 4032 }, { "epoch": 4.948466257668712, "grad_norm": 0.2184220254421234, "learning_rate": 4.446145672330517e-05, "loss": 0.9267144799232483, "step": 4033 }, { "epoch": 4.949693251533742, "grad_norm": 0.21914687752723694, "learning_rate": 4.445830582260702e-05, "loss": 0.9235020875930786, "step": 4034 }, { "epoch": 4.950920245398773, "grad_norm": 0.2856614887714386, "learning_rate": 4.445515413760005e-05, "loss": 0.7416479587554932, "step": 4035 }, { "epoch": 4.9521472392638035, "grad_norm": 0.21215838193893433, "learning_rate": 4.4452001668411314e-05, "loss": 0.8053983449935913, "step": 4036 }, { "epoch": 4.953374233128835, "grad_norm": 0.24970179796218872, "learning_rate": 4.4448848415167865e-05, "loss": 0.6957414746284485, "step": 4037 }, { "epoch": 4.954601226993865, "grad_norm": 0.17413374781608582, "learning_rate": 4.4445694377996805e-05, "loss": 0.9528447389602661, "step": 4038 }, { "epoch": 4.955828220858896, "grad_norm": 0.24906161427497864, "learning_rate": 4.444253955702527e-05, "loss": 0.7602421045303345, "step": 4039 }, { "epoch": 4.957055214723926, "grad_norm": 0.22390437126159668, "learning_rate": 4.443938395238041e-05, "loss": 0.8332961797714233, "step": 4040 }, { "epoch": 4.958282208588957, "grad_norm": 0.27542465925216675, "learning_rate": 4.4436227564189436e-05, "loss": 0.5725491046905518, "step": 4041 }, { "epoch": 4.9595092024539875, "grad_norm": 0.27162137627601624, "learning_rate": 4.4433070392579555e-05, "loss": 0.7042875289916992, "step": 4042 }, { "epoch": 4.960736196319019, "grad_norm": 0.21106567978858948, "learning_rate": 4.442991243767804e-05, "loss": 0.7691018581390381, "step": 4043 }, { "epoch": 4.961963190184049, "grad_norm": 0.25945061445236206, "learning_rate": 4.442675369961215e-05, "loss": 0.5522743463516235, "step": 4044 }, { "epoch": 4.96319018404908, "grad_norm": 0.29221490025520325, "learning_rate": 4.442359417850924e-05, "loss": 0.8690978288650513, "step": 4045 }, { "epoch": 4.96441717791411, "grad_norm": 0.2600163221359253, "learning_rate": 4.4420433874496645e-05, "loss": 0.6710373759269714, "step": 4046 }, { "epoch": 4.965644171779141, "grad_norm": 0.2562808692455292, "learning_rate": 4.4417272787701755e-05, "loss": 0.7809675931930542, "step": 4047 }, { "epoch": 4.9668711656441715, "grad_norm": 0.23054073750972748, "learning_rate": 4.441411091825197e-05, "loss": 0.7151111960411072, "step": 4048 }, { "epoch": 4.968098159509203, "grad_norm": 0.23177821934223175, "learning_rate": 4.441094826627475e-05, "loss": 0.6956559419631958, "step": 4049 }, { "epoch": 4.969325153374233, "grad_norm": 0.24300000071525574, "learning_rate": 4.440778483189756e-05, "loss": 0.6700619459152222, "step": 4050 }, { "epoch": 4.970552147239264, "grad_norm": 0.23719806969165802, "learning_rate": 4.4404620615247924e-05, "loss": 0.8165313005447388, "step": 4051 }, { "epoch": 4.971779141104294, "grad_norm": 0.2619728744029999, "learning_rate": 4.440145561645337e-05, "loss": 0.6797406673431396, "step": 4052 }, { "epoch": 4.973006134969325, "grad_norm": 0.2781981825828552, "learning_rate": 4.439828983564148e-05, "loss": 0.7225083112716675, "step": 4053 }, { "epoch": 4.9742331288343555, "grad_norm": 0.1957462877035141, "learning_rate": 4.439512327293985e-05, "loss": 0.7574698328971863, "step": 4054 }, { "epoch": 4.975460122699387, "grad_norm": 0.2442907840013504, "learning_rate": 4.4391955928476115e-05, "loss": 0.7697187662124634, "step": 4055 }, { "epoch": 4.976687116564417, "grad_norm": 0.20455507934093475, "learning_rate": 4.438878780237795e-05, "loss": 0.9237056970596313, "step": 4056 }, { "epoch": 4.977914110429448, "grad_norm": 0.24803832173347473, "learning_rate": 4.438561889477304e-05, "loss": 0.8113667368888855, "step": 4057 }, { "epoch": 4.979141104294478, "grad_norm": 0.18629314005374908, "learning_rate": 4.4382449205789126e-05, "loss": 0.9420206546783447, "step": 4058 }, { "epoch": 4.980368098159509, "grad_norm": 0.24971584975719452, "learning_rate": 4.437927873555396e-05, "loss": 0.7399328947067261, "step": 4059 }, { "epoch": 4.9815950920245395, "grad_norm": 0.23114252090454102, "learning_rate": 4.437610748419534e-05, "loss": 0.8499451279640198, "step": 4060 }, { "epoch": 4.982822085889571, "grad_norm": 0.23308353126049042, "learning_rate": 4.4372935451841105e-05, "loss": 0.7555267810821533, "step": 4061 }, { "epoch": 4.984049079754601, "grad_norm": 0.2676772177219391, "learning_rate": 4.4369762638619074e-05, "loss": 0.6716222763061523, "step": 4062 }, { "epoch": 4.985276073619632, "grad_norm": 0.22313079237937927, "learning_rate": 4.436658904465716e-05, "loss": 0.7667389512062073, "step": 4063 }, { "epoch": 4.986503067484662, "grad_norm": 0.19147427380084991, "learning_rate": 4.4363414670083287e-05, "loss": 0.7981531620025635, "step": 4064 }, { "epoch": 4.987730061349693, "grad_norm": 0.2494264394044876, "learning_rate": 4.436023951502539e-05, "loss": 0.8118877410888672, "step": 4065 }, { "epoch": 4.9889570552147235, "grad_norm": 0.2679292559623718, "learning_rate": 4.435706357961146e-05, "loss": 0.8946903347969055, "step": 4066 }, { "epoch": 4.990184049079755, "grad_norm": 0.26069527864456177, "learning_rate": 4.43538868639695e-05, "loss": 0.7615999579429626, "step": 4067 }, { "epoch": 4.991411042944785, "grad_norm": 0.23257730901241302, "learning_rate": 4.435070936822755e-05, "loss": 0.8605740070343018, "step": 4068 }, { "epoch": 4.992638036809816, "grad_norm": 0.2723582088947296, "learning_rate": 4.4347531092513704e-05, "loss": 0.7616180181503296, "step": 4069 }, { "epoch": 4.993865030674847, "grad_norm": 0.23525628447532654, "learning_rate": 4.4344352036956065e-05, "loss": 0.9084270596504211, "step": 4070 }, { "epoch": 4.995092024539877, "grad_norm": 0.21219640970230103, "learning_rate": 4.4341172201682765e-05, "loss": 0.8463982939720154, "step": 4071 }, { "epoch": 4.9963190184049076, "grad_norm": 0.26151254773139954, "learning_rate": 4.4337991586821976e-05, "loss": 0.7138129472732544, "step": 4072 }, { "epoch": 4.997546012269939, "grad_norm": 0.30091822147369385, "learning_rate": 4.433481019250189e-05, "loss": 0.6776535511016846, "step": 4073 }, { "epoch": 4.99877300613497, "grad_norm": 0.21587172150611877, "learning_rate": 4.433162801885076e-05, "loss": 0.7743576765060425, "step": 4074 }, { "epoch": 5.0, "grad_norm": 0.22505894303321838, "learning_rate": 4.432844506599684e-05, "loss": 0.8601467609405518, "step": 4075 }, { "epoch": 5.001226993865031, "grad_norm": 0.20578746497631073, "learning_rate": 4.4325261334068426e-05, "loss": 0.6895430088043213, "step": 4076 }, { "epoch": 5.002453987730061, "grad_norm": 0.2290179431438446, "learning_rate": 4.432207682319385e-05, "loss": 0.6919165253639221, "step": 4077 }, { "epoch": 5.0036809815950924, "grad_norm": 0.22368229925632477, "learning_rate": 4.431889153350145e-05, "loss": 0.6276520490646362, "step": 4078 }, { "epoch": 5.004907975460123, "grad_norm": 0.21003012359142303, "learning_rate": 4.431570546511964e-05, "loss": 0.8230520486831665, "step": 4079 }, { "epoch": 5.006134969325154, "grad_norm": 0.2211989164352417, "learning_rate": 4.431251861817684e-05, "loss": 0.8247998356819153, "step": 4080 }, { "epoch": 5.007361963190184, "grad_norm": 0.24872277677059174, "learning_rate": 4.4309330992801485e-05, "loss": 0.5250504016876221, "step": 4081 }, { "epoch": 5.008588957055215, "grad_norm": 0.24871762096881866, "learning_rate": 4.430614258912207e-05, "loss": 0.6683721542358398, "step": 4082 }, { "epoch": 5.009815950920245, "grad_norm": 0.24192370474338531, "learning_rate": 4.430295340726712e-05, "loss": 0.7576694488525391, "step": 4083 }, { "epoch": 5.0110429447852765, "grad_norm": 0.2587743103504181, "learning_rate": 4.429976344736516e-05, "loss": 0.7042916417121887, "step": 4084 }, { "epoch": 5.012269938650307, "grad_norm": 0.24637001752853394, "learning_rate": 4.4296572709544784e-05, "loss": 0.7569793462753296, "step": 4085 }, { "epoch": 5.013496932515338, "grad_norm": 0.2237885743379593, "learning_rate": 4.4293381193934603e-05, "loss": 0.7225300073623657, "step": 4086 }, { "epoch": 5.014723926380368, "grad_norm": 0.2744157314300537, "learning_rate": 4.4290188900663244e-05, "loss": 0.7330000400543213, "step": 4087 }, { "epoch": 5.015950920245399, "grad_norm": 0.2642616033554077, "learning_rate": 4.428699582985939e-05, "loss": 0.6955112218856812, "step": 4088 }, { "epoch": 5.017177914110429, "grad_norm": 0.19714833796024323, "learning_rate": 4.428380198165175e-05, "loss": 0.8032294511795044, "step": 4089 }, { "epoch": 5.0184049079754605, "grad_norm": 0.22652147710323334, "learning_rate": 4.428060735616904e-05, "loss": 0.8533221483230591, "step": 4090 }, { "epoch": 5.019631901840491, "grad_norm": 0.2407950609922409, "learning_rate": 4.427741195354005e-05, "loss": 0.8400650024414062, "step": 4091 }, { "epoch": 5.020858895705522, "grad_norm": 0.2360808402299881, "learning_rate": 4.4274215773893554e-05, "loss": 0.6631039381027222, "step": 4092 }, { "epoch": 5.022085889570552, "grad_norm": 0.2306337058544159, "learning_rate": 4.42710188173584e-05, "loss": 0.6192233562469482, "step": 4093 }, { "epoch": 5.023312883435583, "grad_norm": 0.22307626903057098, "learning_rate": 4.426782108406343e-05, "loss": 0.7683106064796448, "step": 4094 }, { "epoch": 5.024539877300613, "grad_norm": 0.2420169711112976, "learning_rate": 4.426462257413756e-05, "loss": 0.8621127605438232, "step": 4095 }, { "epoch": 5.0257668711656445, "grad_norm": 0.20747531950473785, "learning_rate": 4.426142328770969e-05, "loss": 0.9370846748352051, "step": 4096 }, { "epoch": 5.026993865030675, "grad_norm": 0.26254016160964966, "learning_rate": 4.425822322490878e-05, "loss": 0.6706368327140808, "step": 4097 }, { "epoch": 5.028220858895706, "grad_norm": 0.2544268071651459, "learning_rate": 4.425502238586382e-05, "loss": 0.6435837745666504, "step": 4098 }, { "epoch": 5.029447852760736, "grad_norm": 0.2137855887413025, "learning_rate": 4.4251820770703824e-05, "loss": 0.8987761735916138, "step": 4099 }, { "epoch": 5.030674846625767, "grad_norm": 0.2943600118160248, "learning_rate": 4.4248618379557847e-05, "loss": 0.6939866542816162, "step": 4100 }, { "epoch": 5.031901840490797, "grad_norm": 0.2709823548793793, "learning_rate": 4.424541521255495e-05, "loss": 0.6136292815208435, "step": 4101 }, { "epoch": 5.0331288343558285, "grad_norm": 0.2563098967075348, "learning_rate": 4.424221126982426e-05, "loss": 0.6954342126846313, "step": 4102 }, { "epoch": 5.034355828220859, "grad_norm": 0.245743066072464, "learning_rate": 4.423900655149492e-05, "loss": 0.6785447597503662, "step": 4103 }, { "epoch": 5.03558282208589, "grad_norm": 0.2013503760099411, "learning_rate": 4.42358010576961e-05, "loss": 0.8096430897712708, "step": 4104 }, { "epoch": 5.03680981595092, "grad_norm": 0.2349434643983841, "learning_rate": 4.423259478855699e-05, "loss": 0.7968325614929199, "step": 4105 }, { "epoch": 5.038036809815951, "grad_norm": 0.29600781202316284, "learning_rate": 4.4229387744206845e-05, "loss": 0.5827493667602539, "step": 4106 }, { "epoch": 5.039263803680981, "grad_norm": 0.19632647931575775, "learning_rate": 4.422617992477492e-05, "loss": 0.8990049958229065, "step": 4107 }, { "epoch": 5.0404907975460125, "grad_norm": 0.2064092606306076, "learning_rate": 4.422297133039052e-05, "loss": 0.9538587331771851, "step": 4108 }, { "epoch": 5.041717791411043, "grad_norm": 0.21853022277355194, "learning_rate": 4.421976196118297e-05, "loss": 0.6754975318908691, "step": 4109 }, { "epoch": 5.042944785276074, "grad_norm": 0.20572082698345184, "learning_rate": 4.421655181728163e-05, "loss": 0.656093955039978, "step": 4110 }, { "epoch": 5.044171779141104, "grad_norm": 0.19962161779403687, "learning_rate": 4.42133408988159e-05, "loss": 0.8190485239028931, "step": 4111 }, { "epoch": 5.045398773006135, "grad_norm": 0.3286410868167877, "learning_rate": 4.421012920591519e-05, "loss": 0.5497112274169922, "step": 4112 }, { "epoch": 5.046625766871165, "grad_norm": 0.3042750954627991, "learning_rate": 4.420691673870896e-05, "loss": 0.6206434369087219, "step": 4113 }, { "epoch": 5.0478527607361965, "grad_norm": 0.22490744292736053, "learning_rate": 4.42037034973267e-05, "loss": 0.870556116104126, "step": 4114 }, { "epoch": 5.049079754601227, "grad_norm": 0.22870180010795593, "learning_rate": 4.420048948189792e-05, "loss": 0.8594549894332886, "step": 4115 }, { "epoch": 5.050306748466258, "grad_norm": 0.21252091228961945, "learning_rate": 4.4197274692552176e-05, "loss": 0.7288733124732971, "step": 4116 }, { "epoch": 5.051533742331288, "grad_norm": 0.2685854732990265, "learning_rate": 4.4194059129419044e-05, "loss": 0.5858302712440491, "step": 4117 }, { "epoch": 5.052760736196319, "grad_norm": 0.33573052287101746, "learning_rate": 4.4190842792628115e-05, "loss": 0.6358706951141357, "step": 4118 }, { "epoch": 5.053987730061349, "grad_norm": 0.251171350479126, "learning_rate": 4.4187625682309066e-05, "loss": 0.7530840039253235, "step": 4119 }, { "epoch": 5.0552147239263805, "grad_norm": 0.25744694471359253, "learning_rate": 4.418440779859153e-05, "loss": 0.7274382710456848, "step": 4120 }, { "epoch": 5.056441717791411, "grad_norm": 0.2117602527141571, "learning_rate": 4.4181189141605253e-05, "loss": 0.6068716049194336, "step": 4121 }, { "epoch": 5.057668711656442, "grad_norm": 0.21179375052452087, "learning_rate": 4.417796971147994e-05, "loss": 0.8707982897758484, "step": 4122 }, { "epoch": 5.058895705521472, "grad_norm": 0.2462919056415558, "learning_rate": 4.4174749508345356e-05, "loss": 0.646386981010437, "step": 4123 }, { "epoch": 5.060122699386503, "grad_norm": 0.21002893149852753, "learning_rate": 4.417152853233132e-05, "loss": 0.8370656967163086, "step": 4124 }, { "epoch": 5.061349693251533, "grad_norm": 0.24298438429832458, "learning_rate": 4.416830678356764e-05, "loss": 0.7991474866867065, "step": 4125 }, { "epoch": 5.0625766871165645, "grad_norm": 0.2917458117008209, "learning_rate": 4.4165084262184186e-05, "loss": 0.6869657039642334, "step": 4126 }, { "epoch": 5.063803680981595, "grad_norm": 0.2161005586385727, "learning_rate": 4.416186096831084e-05, "loss": 0.9219179153442383, "step": 4127 }, { "epoch": 5.065030674846626, "grad_norm": 0.2721659541130066, "learning_rate": 4.4158636902077545e-05, "loss": 0.7094687819480896, "step": 4128 }, { "epoch": 5.066257668711656, "grad_norm": 0.24151019752025604, "learning_rate": 4.415541206361422e-05, "loss": 0.7523177862167358, "step": 4129 }, { "epoch": 5.067484662576687, "grad_norm": 0.2216823697090149, "learning_rate": 4.415218645305088e-05, "loss": 0.8655699491500854, "step": 4130 }, { "epoch": 5.068711656441717, "grad_norm": 0.2302679568529129, "learning_rate": 4.414896007051752e-05, "loss": 0.7054004669189453, "step": 4131 }, { "epoch": 5.0699386503067485, "grad_norm": 0.2899165153503418, "learning_rate": 4.41457329161442e-05, "loss": 0.6285661458969116, "step": 4132 }, { "epoch": 5.071165644171779, "grad_norm": 0.20011192560195923, "learning_rate": 4.4142504990061e-05, "loss": 0.8246632814407349, "step": 4133 }, { "epoch": 5.07239263803681, "grad_norm": 0.24412329494953156, "learning_rate": 4.4139276292398e-05, "loss": 0.8332362174987793, "step": 4134 }, { "epoch": 5.07361963190184, "grad_norm": 0.22317281365394592, "learning_rate": 4.4136046823285374e-05, "loss": 0.7611685991287231, "step": 4135 }, { "epoch": 5.074846625766871, "grad_norm": 0.30398061871528625, "learning_rate": 4.4132816582853274e-05, "loss": 0.5215960741043091, "step": 4136 }, { "epoch": 5.076073619631902, "grad_norm": 0.224591463804245, "learning_rate": 4.412958557123191e-05, "loss": 0.7706037759780884, "step": 4137 }, { "epoch": 5.0773006134969325, "grad_norm": 0.24414385855197906, "learning_rate": 4.41263537885515e-05, "loss": 0.6920664310455322, "step": 4138 }, { "epoch": 5.078527607361964, "grad_norm": 0.21417061984539032, "learning_rate": 4.412312123494233e-05, "loss": 0.7730706930160522, "step": 4139 }, { "epoch": 5.079754601226994, "grad_norm": 0.20634357631206512, "learning_rate": 4.411988791053468e-05, "loss": 0.8968856930732727, "step": 4140 }, { "epoch": 5.080981595092025, "grad_norm": 0.31797948479652405, "learning_rate": 4.4116653815458875e-05, "loss": 0.6503862142562866, "step": 4141 }, { "epoch": 5.082208588957055, "grad_norm": 0.2653987407684326, "learning_rate": 4.4113418949845284e-05, "loss": 0.8090676069259644, "step": 4142 }, { "epoch": 5.083435582822086, "grad_norm": 0.1973385512828827, "learning_rate": 4.411018331382428e-05, "loss": 0.7764863967895508, "step": 4143 }, { "epoch": 5.0846625766871165, "grad_norm": 0.2692280411720276, "learning_rate": 4.41069469075263e-05, "loss": 0.7094719409942627, "step": 4144 }, { "epoch": 5.085889570552148, "grad_norm": 0.22913037240505219, "learning_rate": 4.410370973108179e-05, "loss": 0.8475584983825684, "step": 4145 }, { "epoch": 5.087116564417178, "grad_norm": 0.2200450748205185, "learning_rate": 4.410047178462121e-05, "loss": 0.6940253973007202, "step": 4146 }, { "epoch": 5.088343558282209, "grad_norm": 0.24996121227741241, "learning_rate": 4.40972330682751e-05, "loss": 0.8080329895019531, "step": 4147 }, { "epoch": 5.089570552147239, "grad_norm": 0.23904022574424744, "learning_rate": 4.409399358217399e-05, "loss": 0.6993573904037476, "step": 4148 }, { "epoch": 5.09079754601227, "grad_norm": 0.22655721008777618, "learning_rate": 4.4090753326448456e-05, "loss": 0.7099610567092896, "step": 4149 }, { "epoch": 5.0920245398773005, "grad_norm": 0.2752433717250824, "learning_rate": 4.40875123012291e-05, "loss": 0.6475374698638916, "step": 4150 }, { "epoch": 5.093251533742332, "grad_norm": 0.1944740116596222, "learning_rate": 4.408427050664656e-05, "loss": 0.6815102696418762, "step": 4151 }, { "epoch": 5.094478527607362, "grad_norm": 0.21675588190555573, "learning_rate": 4.4081027942831514e-05, "loss": 0.8084523677825928, "step": 4152 }, { "epoch": 5.095705521472393, "grad_norm": 0.18821114301681519, "learning_rate": 4.407778460991465e-05, "loss": 0.8031030297279358, "step": 4153 }, { "epoch": 5.096932515337423, "grad_norm": 0.22074449062347412, "learning_rate": 4.40745405080267e-05, "loss": 0.7768094539642334, "step": 4154 }, { "epoch": 5.098159509202454, "grad_norm": 0.22474028170108795, "learning_rate": 4.407129563729843e-05, "loss": 0.7410145998001099, "step": 4155 }, { "epoch": 5.0993865030674845, "grad_norm": 0.24181699752807617, "learning_rate": 4.4068049997860614e-05, "loss": 0.7634725570678711, "step": 4156 }, { "epoch": 5.100613496932516, "grad_norm": 0.22870594263076782, "learning_rate": 4.406480358984409e-05, "loss": 0.7059310674667358, "step": 4157 }, { "epoch": 5.101840490797546, "grad_norm": 0.20239219069480896, "learning_rate": 4.406155641337971e-05, "loss": 0.8487426042556763, "step": 4158 }, { "epoch": 5.103067484662577, "grad_norm": 0.2230139821767807, "learning_rate": 4.405830846859836e-05, "loss": 0.5760300159454346, "step": 4159 }, { "epoch": 5.104294478527607, "grad_norm": 0.2516365051269531, "learning_rate": 4.405505975563094e-05, "loss": 0.5941361784934998, "step": 4160 }, { "epoch": 5.105521472392638, "grad_norm": 0.24807427823543549, "learning_rate": 4.405181027460842e-05, "loss": 0.7287515997886658, "step": 4161 }, { "epoch": 5.1067484662576685, "grad_norm": 0.2595694363117218, "learning_rate": 4.4048560025661765e-05, "loss": 0.7285478115081787, "step": 4162 }, { "epoch": 5.1079754601227, "grad_norm": 0.2414848953485489, "learning_rate": 4.4045309008921975e-05, "loss": 0.6541873812675476, "step": 4163 }, { "epoch": 5.10920245398773, "grad_norm": 0.26555147767066956, "learning_rate": 4.404205722452011e-05, "loss": 0.7679531574249268, "step": 4164 }, { "epoch": 5.110429447852761, "grad_norm": 0.25397050380706787, "learning_rate": 4.403880467258722e-05, "loss": 0.7946916818618774, "step": 4165 }, { "epoch": 5.111656441717791, "grad_norm": 0.1954001933336258, "learning_rate": 4.4035551353254415e-05, "loss": 0.8571412563323975, "step": 4166 }, { "epoch": 5.112883435582822, "grad_norm": 0.30114641785621643, "learning_rate": 4.403229726665282e-05, "loss": 0.5834059715270996, "step": 4167 }, { "epoch": 5.1141104294478525, "grad_norm": 0.2177446186542511, "learning_rate": 4.402904241291361e-05, "loss": 0.7590806484222412, "step": 4168 }, { "epoch": 5.115337423312884, "grad_norm": 0.2353230118751526, "learning_rate": 4.402578679216797e-05, "loss": 0.7135342955589294, "step": 4169 }, { "epoch": 5.116564417177914, "grad_norm": 0.28203633427619934, "learning_rate": 4.402253040454713e-05, "loss": 0.47637301683425903, "step": 4170 }, { "epoch": 5.117791411042945, "grad_norm": 0.22229428589344025, "learning_rate": 4.401927325018235e-05, "loss": 0.8246301412582397, "step": 4171 }, { "epoch": 5.119018404907975, "grad_norm": 0.24544507265090942, "learning_rate": 4.40160153292049e-05, "loss": 0.6116777062416077, "step": 4172 }, { "epoch": 5.120245398773006, "grad_norm": 0.20864614844322205, "learning_rate": 4.401275664174611e-05, "loss": 1.0198135375976562, "step": 4173 }, { "epoch": 5.1214723926380366, "grad_norm": 0.21362894773483276, "learning_rate": 4.4009497187937324e-05, "loss": 0.8526793718338013, "step": 4174 }, { "epoch": 5.122699386503068, "grad_norm": 0.24966445565223694, "learning_rate": 4.400623696790993e-05, "loss": 0.8251374959945679, "step": 4175 }, { "epoch": 5.123926380368098, "grad_norm": 0.29662129282951355, "learning_rate": 4.4002975981795324e-05, "loss": 0.7696837186813354, "step": 4176 }, { "epoch": 5.125153374233129, "grad_norm": 0.21630729734897614, "learning_rate": 4.399971422972496e-05, "loss": 0.9757575988769531, "step": 4177 }, { "epoch": 5.126380368098159, "grad_norm": 0.24196209013462067, "learning_rate": 4.39964517118303e-05, "loss": 0.7195355892181396, "step": 4178 }, { "epoch": 5.12760736196319, "grad_norm": 0.22682449221611023, "learning_rate": 4.399318842824285e-05, "loss": 0.7079248428344727, "step": 4179 }, { "epoch": 5.128834355828221, "grad_norm": 0.27343979477882385, "learning_rate": 4.398992437909415e-05, "loss": 0.6890258193016052, "step": 4180 }, { "epoch": 5.130061349693252, "grad_norm": 0.285995751619339, "learning_rate": 4.398665956451575e-05, "loss": 0.701263427734375, "step": 4181 }, { "epoch": 5.131288343558282, "grad_norm": 0.23955006897449493, "learning_rate": 4.398339398463927e-05, "loss": 0.6131929159164429, "step": 4182 }, { "epoch": 5.132515337423313, "grad_norm": 0.22691379487514496, "learning_rate": 4.3980127639596316e-05, "loss": 0.845821738243103, "step": 4183 }, { "epoch": 5.133742331288343, "grad_norm": 0.2365681529045105, "learning_rate": 4.397686052951854e-05, "loss": 0.7715795040130615, "step": 4184 }, { "epoch": 5.134969325153374, "grad_norm": 0.32087764143943787, "learning_rate": 4.397359265453764e-05, "loss": 0.6094650626182556, "step": 4185 }, { "epoch": 5.136196319018405, "grad_norm": 0.23627373576164246, "learning_rate": 4.397032401478535e-05, "loss": 0.820612907409668, "step": 4186 }, { "epoch": 5.137423312883436, "grad_norm": 0.24825990200042725, "learning_rate": 4.396705461039339e-05, "loss": 0.7944272756576538, "step": 4187 }, { "epoch": 5.138650306748466, "grad_norm": 0.2697121202945709, "learning_rate": 4.396378444149356e-05, "loss": 1.0745189189910889, "step": 4188 }, { "epoch": 5.139877300613497, "grad_norm": 0.24224722385406494, "learning_rate": 4.396051350821766e-05, "loss": 0.7606372833251953, "step": 4189 }, { "epoch": 5.141104294478527, "grad_norm": 0.2111661285161972, "learning_rate": 4.395724181069755e-05, "loss": 0.7981828451156616, "step": 4190 }, { "epoch": 5.142331288343558, "grad_norm": 0.29457125067710876, "learning_rate": 4.395396934906507e-05, "loss": 0.7042727470397949, "step": 4191 }, { "epoch": 5.143558282208589, "grad_norm": 0.23240616917610168, "learning_rate": 4.395069612345216e-05, "loss": 0.7630959749221802, "step": 4192 }, { "epoch": 5.14478527607362, "grad_norm": 0.2609594464302063, "learning_rate": 4.394742213399073e-05, "loss": 0.7038618922233582, "step": 4193 }, { "epoch": 5.14601226993865, "grad_norm": 0.23387011885643005, "learning_rate": 4.394414738081276e-05, "loss": 0.8054525852203369, "step": 4194 }, { "epoch": 5.147239263803681, "grad_norm": 0.24151703715324402, "learning_rate": 4.394087186405024e-05, "loss": 0.8020442724227905, "step": 4195 }, { "epoch": 5.148466257668711, "grad_norm": 0.24741467833518982, "learning_rate": 4.393759558383519e-05, "loss": 0.7440369129180908, "step": 4196 }, { "epoch": 5.149693251533742, "grad_norm": 0.25990933179855347, "learning_rate": 4.393431854029968e-05, "loss": 0.7276612520217896, "step": 4197 }, { "epoch": 5.150920245398773, "grad_norm": 0.3106139898300171, "learning_rate": 4.393104073357579e-05, "loss": 0.5323446989059448, "step": 4198 }, { "epoch": 5.152147239263804, "grad_norm": 0.21633468568325043, "learning_rate": 4.392776216379564e-05, "loss": 0.8747013807296753, "step": 4199 }, { "epoch": 5.153374233128835, "grad_norm": 0.23921215534210205, "learning_rate": 4.392448283109139e-05, "loss": 0.6585037708282471, "step": 4200 }, { "epoch": 5.154601226993865, "grad_norm": 0.23843827843666077, "learning_rate": 4.392120273559519e-05, "loss": 0.7215925455093384, "step": 4201 }, { "epoch": 5.155828220858895, "grad_norm": 0.23021993041038513, "learning_rate": 4.391792187743929e-05, "loss": 0.6535534262657166, "step": 4202 }, { "epoch": 5.157055214723926, "grad_norm": 0.24905350804328918, "learning_rate": 4.391464025675591e-05, "loss": 0.6802381873130798, "step": 4203 }, { "epoch": 5.1582822085889575, "grad_norm": 0.27841636538505554, "learning_rate": 4.391135787367733e-05, "loss": 0.727125346660614, "step": 4204 }, { "epoch": 5.159509202453988, "grad_norm": 0.18111665546894073, "learning_rate": 4.3908074728335845e-05, "loss": 0.8661786317825317, "step": 4205 }, { "epoch": 5.160736196319019, "grad_norm": 0.22353118658065796, "learning_rate": 4.39047908208638e-05, "loss": 0.7630064487457275, "step": 4206 }, { "epoch": 5.161963190184049, "grad_norm": 0.2372097223997116, "learning_rate": 4.3901506151393556e-05, "loss": 0.715969979763031, "step": 4207 }, { "epoch": 5.16319018404908, "grad_norm": 0.22634972631931305, "learning_rate": 4.3898220720057505e-05, "loss": 0.9063512682914734, "step": 4208 }, { "epoch": 5.16441717791411, "grad_norm": 0.2618541717529297, "learning_rate": 4.389493452698807e-05, "loss": 0.6693446636199951, "step": 4209 }, { "epoch": 5.1656441717791415, "grad_norm": 0.19072583317756653, "learning_rate": 4.3891647572317727e-05, "loss": 0.858405590057373, "step": 4210 }, { "epoch": 5.166871165644172, "grad_norm": 0.23822352290153503, "learning_rate": 4.3888359856178944e-05, "loss": 0.7089948058128357, "step": 4211 }, { "epoch": 5.168098159509203, "grad_norm": 0.21075651049613953, "learning_rate": 4.388507137870425e-05, "loss": 0.6393947601318359, "step": 4212 }, { "epoch": 5.169325153374233, "grad_norm": 0.25843456387519836, "learning_rate": 4.388178214002618e-05, "loss": 0.8052080273628235, "step": 4213 }, { "epoch": 5.170552147239264, "grad_norm": 0.20712998509407043, "learning_rate": 4.387849214027734e-05, "loss": 0.7494384050369263, "step": 4214 }, { "epoch": 5.171779141104294, "grad_norm": 0.22590897977352142, "learning_rate": 4.387520137959031e-05, "loss": 0.8946810960769653, "step": 4215 }, { "epoch": 5.1730061349693255, "grad_norm": 0.25797510147094727, "learning_rate": 4.387190985809776e-05, "loss": 0.6720937490463257, "step": 4216 }, { "epoch": 5.174233128834356, "grad_norm": 0.24086806178092957, "learning_rate": 4.386861757593235e-05, "loss": 0.7070193290710449, "step": 4217 }, { "epoch": 5.175460122699387, "grad_norm": 0.24856828153133392, "learning_rate": 4.386532453322676e-05, "loss": 0.6778696775436401, "step": 4218 }, { "epoch": 5.176687116564417, "grad_norm": 0.2468341439962387, "learning_rate": 4.3862030730113754e-05, "loss": 0.6734537482261658, "step": 4219 }, { "epoch": 5.177914110429448, "grad_norm": 0.19378270208835602, "learning_rate": 4.3858736166726095e-05, "loss": 0.8770575523376465, "step": 4220 }, { "epoch": 5.179141104294478, "grad_norm": 0.25982192158699036, "learning_rate": 4.385544084319656e-05, "loss": 0.7112207412719727, "step": 4221 }, { "epoch": 5.1803680981595095, "grad_norm": 0.2676723301410675, "learning_rate": 4.385214475965798e-05, "loss": 0.8535661101341248, "step": 4222 }, { "epoch": 5.18159509202454, "grad_norm": 0.19073918461799622, "learning_rate": 4.384884791624322e-05, "loss": 0.7948441505432129, "step": 4223 }, { "epoch": 5.182822085889571, "grad_norm": 0.2833673357963562, "learning_rate": 4.384555031308515e-05, "loss": 0.6726750731468201, "step": 4224 }, { "epoch": 5.184049079754601, "grad_norm": 0.26534491777420044, "learning_rate": 4.384225195031671e-05, "loss": 0.597529411315918, "step": 4225 }, { "epoch": 5.185276073619632, "grad_norm": 0.2583802342414856, "learning_rate": 4.3838952828070814e-05, "loss": 0.7653685808181763, "step": 4226 }, { "epoch": 5.186503067484662, "grad_norm": 0.23095348477363586, "learning_rate": 4.383565294648047e-05, "loss": 0.7738152742385864, "step": 4227 }, { "epoch": 5.1877300613496935, "grad_norm": 0.26903533935546875, "learning_rate": 4.383235230567868e-05, "loss": 0.7655706405639648, "step": 4228 }, { "epoch": 5.188957055214724, "grad_norm": 0.294289767742157, "learning_rate": 4.3829050905798474e-05, "loss": 0.6140145063400269, "step": 4229 }, { "epoch": 5.190184049079755, "grad_norm": 0.1989278495311737, "learning_rate": 4.3825748746972926e-05, "loss": 0.7609699964523315, "step": 4230 }, { "epoch": 5.191411042944785, "grad_norm": 0.20525391399860382, "learning_rate": 4.382244582933515e-05, "loss": 0.820356011390686, "step": 4231 }, { "epoch": 5.192638036809816, "grad_norm": 0.24151909351348877, "learning_rate": 4.3819142153018254e-05, "loss": 0.8481144905090332, "step": 4232 }, { "epoch": 5.193865030674846, "grad_norm": 0.23647932708263397, "learning_rate": 4.3815837718155423e-05, "loss": 0.7450767755508423, "step": 4233 }, { "epoch": 5.1950920245398775, "grad_norm": 0.3296220898628235, "learning_rate": 4.381253252487984e-05, "loss": 0.609542727470398, "step": 4234 }, { "epoch": 5.196319018404908, "grad_norm": 0.19041411578655243, "learning_rate": 4.3809226573324704e-05, "loss": 0.777566134929657, "step": 4235 }, { "epoch": 5.197546012269939, "grad_norm": 0.27432405948638916, "learning_rate": 4.380591986362331e-05, "loss": 0.6833937168121338, "step": 4236 }, { "epoch": 5.198773006134969, "grad_norm": 0.22762814164161682, "learning_rate": 4.380261239590892e-05, "loss": 0.8292974829673767, "step": 4237 }, { "epoch": 5.2, "grad_norm": 0.23489858210086823, "learning_rate": 4.379930417031484e-05, "loss": 0.8053081035614014, "step": 4238 }, { "epoch": 5.20122699386503, "grad_norm": 0.4835062026977539, "learning_rate": 4.379599518697444e-05, "loss": 0.7170343399047852, "step": 4239 }, { "epoch": 5.2024539877300615, "grad_norm": 0.2624718248844147, "learning_rate": 4.379268544602107e-05, "loss": 0.7932238578796387, "step": 4240 }, { "epoch": 5.203680981595092, "grad_norm": 0.25623854994773865, "learning_rate": 4.378937494758816e-05, "loss": 0.6942330598831177, "step": 4241 }, { "epoch": 5.204907975460123, "grad_norm": 0.2579643130302429, "learning_rate": 4.378606369180913e-05, "loss": 0.7655278444290161, "step": 4242 }, { "epoch": 5.206134969325153, "grad_norm": 0.2505952715873718, "learning_rate": 4.3782751678817445e-05, "loss": 0.8539177179336548, "step": 4243 }, { "epoch": 5.207361963190184, "grad_norm": 0.2259218543767929, "learning_rate": 4.3779438908746616e-05, "loss": 0.8215184211730957, "step": 4244 }, { "epoch": 5.208588957055214, "grad_norm": 0.2521761357784271, "learning_rate": 4.377612538173017e-05, "loss": 0.7805914878845215, "step": 4245 }, { "epoch": 5.2098159509202455, "grad_norm": 0.24084311723709106, "learning_rate": 4.377281109790166e-05, "loss": 0.8846621513366699, "step": 4246 }, { "epoch": 5.211042944785276, "grad_norm": 0.23734216392040253, "learning_rate": 4.3769496057394665e-05, "loss": 0.8023818731307983, "step": 4247 }, { "epoch": 5.212269938650307, "grad_norm": 0.250053346157074, "learning_rate": 4.376618026034283e-05, "loss": 0.6982192993164062, "step": 4248 }, { "epoch": 5.213496932515337, "grad_norm": 0.30269259214401245, "learning_rate": 4.376286370687978e-05, "loss": 0.5622333288192749, "step": 4249 }, { "epoch": 5.214723926380368, "grad_norm": 0.2603653073310852, "learning_rate": 4.3759546397139216e-05, "loss": 0.8094892501831055, "step": 4250 }, { "epoch": 5.215950920245398, "grad_norm": 0.2689723074436188, "learning_rate": 4.3756228331254836e-05, "loss": 0.8607370853424072, "step": 4251 }, { "epoch": 5.2171779141104295, "grad_norm": 0.21358248591423035, "learning_rate": 4.375290950936039e-05, "loss": 0.8785949945449829, "step": 4252 }, { "epoch": 5.21840490797546, "grad_norm": 0.2670961320400238, "learning_rate": 4.374958993158964e-05, "loss": 0.8877160549163818, "step": 4253 }, { "epoch": 5.219631901840491, "grad_norm": 0.23502665758132935, "learning_rate": 4.37462695980764e-05, "loss": 0.6141762137413025, "step": 4254 }, { "epoch": 5.220858895705521, "grad_norm": 0.2532339096069336, "learning_rate": 4.3742948508954494e-05, "loss": 0.7038302421569824, "step": 4255 }, { "epoch": 5.222085889570552, "grad_norm": 0.24362970888614655, "learning_rate": 4.3739626664357805e-05, "loss": 0.6992708444595337, "step": 4256 }, { "epoch": 5.223312883435582, "grad_norm": 0.2566893696784973, "learning_rate": 4.37363040644202e-05, "loss": 0.8021535873413086, "step": 4257 }, { "epoch": 5.2245398773006135, "grad_norm": 0.23963217437267303, "learning_rate": 4.373298070927562e-05, "loss": 0.9083724617958069, "step": 4258 }, { "epoch": 5.225766871165644, "grad_norm": 0.26442888379096985, "learning_rate": 4.372965659905801e-05, "loss": 0.5227694511413574, "step": 4259 }, { "epoch": 5.226993865030675, "grad_norm": 0.2345583587884903, "learning_rate": 4.372633173390136e-05, "loss": 0.7304799556732178, "step": 4260 }, { "epoch": 5.228220858895705, "grad_norm": 0.2906548082828522, "learning_rate": 4.372300611393969e-05, "loss": 0.6778143644332886, "step": 4261 }, { "epoch": 5.229447852760736, "grad_norm": 0.23017336428165436, "learning_rate": 4.371967973930705e-05, "loss": 0.6900676488876343, "step": 4262 }, { "epoch": 5.230674846625767, "grad_norm": 0.249323308467865, "learning_rate": 4.37163526101375e-05, "loss": 0.7887951135635376, "step": 4263 }, { "epoch": 5.2319018404907975, "grad_norm": 0.2377883344888687, "learning_rate": 4.371302472656517e-05, "loss": 0.8347145318984985, "step": 4264 }, { "epoch": 5.233128834355828, "grad_norm": 0.2285655438899994, "learning_rate": 4.3709696088724175e-05, "loss": 0.8881438970565796, "step": 4265 }, { "epoch": 5.234355828220859, "grad_norm": 0.2601989209651947, "learning_rate": 4.370636669674869e-05, "loss": 0.6065865159034729, "step": 4266 }, { "epoch": 5.23558282208589, "grad_norm": 0.21222853660583496, "learning_rate": 4.370303655077291e-05, "loss": 0.8828884363174438, "step": 4267 }, { "epoch": 5.23680981595092, "grad_norm": 0.22559188306331635, "learning_rate": 4.369970565093108e-05, "loss": 0.7141213417053223, "step": 4268 }, { "epoch": 5.238036809815951, "grad_norm": 0.2250756025314331, "learning_rate": 4.369637399735745e-05, "loss": 1.072713017463684, "step": 4269 }, { "epoch": 5.2392638036809815, "grad_norm": 0.22108221054077148, "learning_rate": 4.369304159018631e-05, "loss": 0.8887827396392822, "step": 4270 }, { "epoch": 5.240490797546013, "grad_norm": 0.2800784111022949, "learning_rate": 4.368970842955197e-05, "loss": 0.6980715990066528, "step": 4271 }, { "epoch": 5.241717791411043, "grad_norm": 0.26000502705574036, "learning_rate": 4.368637451558879e-05, "loss": 0.7821187973022461, "step": 4272 }, { "epoch": 5.242944785276074, "grad_norm": 0.23665562272071838, "learning_rate": 4.368303984843115e-05, "loss": 0.8018335103988647, "step": 4273 }, { "epoch": 5.244171779141104, "grad_norm": 0.2373228222131729, "learning_rate": 4.367970442821345e-05, "loss": 0.7715905904769897, "step": 4274 }, { "epoch": 5.245398773006135, "grad_norm": 0.23925429582595825, "learning_rate": 4.3676368255070155e-05, "loss": 0.6657741069793701, "step": 4275 }, { "epoch": 5.2466257668711656, "grad_norm": 0.2153216153383255, "learning_rate": 4.367303132913572e-05, "loss": 0.8177608251571655, "step": 4276 }, { "epoch": 5.247852760736197, "grad_norm": 0.2771245539188385, "learning_rate": 4.366969365054464e-05, "loss": 0.7025607824325562, "step": 4277 }, { "epoch": 5.249079754601227, "grad_norm": 0.33571258187294006, "learning_rate": 4.3666355219431464e-05, "loss": 0.6545820236206055, "step": 4278 }, { "epoch": 5.250306748466258, "grad_norm": 0.2008379101753235, "learning_rate": 4.366301603593074e-05, "loss": 0.965573787689209, "step": 4279 }, { "epoch": 5.251533742331288, "grad_norm": 0.2119438201189041, "learning_rate": 4.3659676100177075e-05, "loss": 0.9179338216781616, "step": 4280 }, { "epoch": 5.252760736196319, "grad_norm": 0.3445943593978882, "learning_rate": 4.365633541230509e-05, "loss": 0.5739071369171143, "step": 4281 }, { "epoch": 5.25398773006135, "grad_norm": 0.23845568299293518, "learning_rate": 4.365299397244943e-05, "loss": 0.8506755232810974, "step": 4282 }, { "epoch": 5.255214723926381, "grad_norm": 0.2720343768596649, "learning_rate": 4.3649651780744775e-05, "loss": 0.6347378492355347, "step": 4283 }, { "epoch": 5.256441717791411, "grad_norm": 0.2519414722919464, "learning_rate": 4.364630883732585e-05, "loss": 0.8328059315681458, "step": 4284 }, { "epoch": 5.257668711656442, "grad_norm": 0.28624990582466125, "learning_rate": 4.36429651423274e-05, "loss": 0.7349112033843994, "step": 4285 }, { "epoch": 5.258895705521472, "grad_norm": 0.2525469660758972, "learning_rate": 4.3639620695884196e-05, "loss": 0.6280244588851929, "step": 4286 }, { "epoch": 5.260122699386503, "grad_norm": 0.2899473309516907, "learning_rate": 4.363627549813105e-05, "loss": 0.768886387348175, "step": 4287 }, { "epoch": 5.261349693251534, "grad_norm": 0.23460009694099426, "learning_rate": 4.3632929549202786e-05, "loss": 0.9265852570533752, "step": 4288 }, { "epoch": 5.262576687116565, "grad_norm": 0.22498977184295654, "learning_rate": 4.3629582849234275e-05, "loss": 0.8123661875724792, "step": 4289 }, { "epoch": 5.263803680981595, "grad_norm": 0.2738339602947235, "learning_rate": 4.36262353983604e-05, "loss": 0.7545895576477051, "step": 4290 }, { "epoch": 5.265030674846626, "grad_norm": 0.2783018946647644, "learning_rate": 4.3622887196716114e-05, "loss": 0.7336950302124023, "step": 4291 }, { "epoch": 5.266257668711656, "grad_norm": 0.2557961046695709, "learning_rate": 4.361953824443636e-05, "loss": 0.7775977849960327, "step": 4292 }, { "epoch": 5.267484662576687, "grad_norm": 0.24700041115283966, "learning_rate": 4.361618854165612e-05, "loss": 0.7014961838722229, "step": 4293 }, { "epoch": 5.268711656441718, "grad_norm": 0.2531028389930725, "learning_rate": 4.3612838088510424e-05, "loss": 0.7185565233230591, "step": 4294 }, { "epoch": 5.269938650306749, "grad_norm": 0.2154957503080368, "learning_rate": 4.360948688513429e-05, "loss": 0.733773946762085, "step": 4295 }, { "epoch": 5.271165644171779, "grad_norm": 0.2822190523147583, "learning_rate": 4.360613493166283e-05, "loss": 0.6647047996520996, "step": 4296 }, { "epoch": 5.27239263803681, "grad_norm": 0.29018619656562805, "learning_rate": 4.360278222823113e-05, "loss": 0.6371022462844849, "step": 4297 }, { "epoch": 5.27361963190184, "grad_norm": 0.24275068938732147, "learning_rate": 4.359942877497434e-05, "loss": 0.9180781841278076, "step": 4298 }, { "epoch": 5.274846625766871, "grad_norm": 0.26317298412323, "learning_rate": 4.3596074572027624e-05, "loss": 0.7940413951873779, "step": 4299 }, { "epoch": 5.276073619631902, "grad_norm": 0.2628194987773895, "learning_rate": 4.359271961952618e-05, "loss": 0.7161688804626465, "step": 4300 }, { "epoch": 5.277300613496933, "grad_norm": 0.2516510784626007, "learning_rate": 4.358936391760524e-05, "loss": 0.6929717063903809, "step": 4301 }, { "epoch": 5.278527607361963, "grad_norm": 0.2784092128276825, "learning_rate": 4.358600746640005e-05, "loss": 0.8135795593261719, "step": 4302 }, { "epoch": 5.279754601226994, "grad_norm": 0.26035574078559875, "learning_rate": 4.358265026604591e-05, "loss": 0.6848307847976685, "step": 4303 }, { "epoch": 5.280981595092024, "grad_norm": 0.27590110898017883, "learning_rate": 4.357929231667814e-05, "loss": 0.6316980123519897, "step": 4304 }, { "epoch": 5.282208588957055, "grad_norm": 0.294190913438797, "learning_rate": 4.3575933618432085e-05, "loss": 0.9295522570610046, "step": 4305 }, { "epoch": 5.283435582822086, "grad_norm": 0.24312329292297363, "learning_rate": 4.357257417144313e-05, "loss": 0.6983933448791504, "step": 4306 }, { "epoch": 5.284662576687117, "grad_norm": 0.19770610332489014, "learning_rate": 4.356921397584667e-05, "loss": 0.8632583618164062, "step": 4307 }, { "epoch": 5.285889570552147, "grad_norm": 0.258804589509964, "learning_rate": 4.356585303177816e-05, "loss": 0.8931772708892822, "step": 4308 }, { "epoch": 5.287116564417178, "grad_norm": 0.2391701191663742, "learning_rate": 4.356249133937307e-05, "loss": 0.8418111801147461, "step": 4309 }, { "epoch": 5.288343558282208, "grad_norm": 0.24452313780784607, "learning_rate": 4.35591288987669e-05, "loss": 0.7242568731307983, "step": 4310 }, { "epoch": 5.289570552147239, "grad_norm": 0.20747777819633484, "learning_rate": 4.355576571009517e-05, "loss": 1.036715030670166, "step": 4311 }, { "epoch": 5.29079754601227, "grad_norm": 0.2315981686115265, "learning_rate": 4.355240177349345e-05, "loss": 0.9310296773910522, "step": 4312 }, { "epoch": 5.292024539877301, "grad_norm": 0.24469657242298126, "learning_rate": 4.354903708909732e-05, "loss": 0.7286678552627563, "step": 4313 }, { "epoch": 5.293251533742331, "grad_norm": 0.24754023551940918, "learning_rate": 4.354567165704241e-05, "loss": 0.8429413437843323, "step": 4314 }, { "epoch": 5.294478527607362, "grad_norm": 0.23570391535758972, "learning_rate": 4.354230547746438e-05, "loss": 0.7473578453063965, "step": 4315 }, { "epoch": 5.295705521472392, "grad_norm": 0.22128990292549133, "learning_rate": 4.353893855049889e-05, "loss": 0.7651873826980591, "step": 4316 }, { "epoch": 5.296932515337423, "grad_norm": 0.22802767157554626, "learning_rate": 4.3535570876281674e-05, "loss": 0.7956124544143677, "step": 4317 }, { "epoch": 5.298159509202454, "grad_norm": 0.27476271986961365, "learning_rate": 4.3532202454948445e-05, "loss": 0.8514007329940796, "step": 4318 }, { "epoch": 5.299386503067485, "grad_norm": 0.233178049325943, "learning_rate": 4.352883328663499e-05, "loss": 0.6834339499473572, "step": 4319 }, { "epoch": 5.300613496932515, "grad_norm": 0.1773555725812912, "learning_rate": 4.352546337147713e-05, "loss": 0.9165624380111694, "step": 4320 }, { "epoch": 5.301840490797546, "grad_norm": 0.2246849536895752, "learning_rate": 4.352209270961066e-05, "loss": 0.7888034582138062, "step": 4321 }, { "epoch": 5.303067484662577, "grad_norm": 0.19993403553962708, "learning_rate": 4.351872130117147e-05, "loss": 0.6797897815704346, "step": 4322 }, { "epoch": 5.304294478527607, "grad_norm": 0.1909944862127304, "learning_rate": 4.351534914629544e-05, "loss": 0.7248971462249756, "step": 4323 }, { "epoch": 5.305521472392638, "grad_norm": 0.2425612360239029, "learning_rate": 4.351197624511849e-05, "loss": 0.7001065015792847, "step": 4324 }, { "epoch": 5.306748466257669, "grad_norm": 0.20733806490898132, "learning_rate": 4.350860259777658e-05, "loss": 0.730293869972229, "step": 4325 }, { "epoch": 5.3079754601227, "grad_norm": 0.23941121995449066, "learning_rate": 4.350522820440568e-05, "loss": 0.9619107246398926, "step": 4326 }, { "epoch": 5.30920245398773, "grad_norm": 0.248613640666008, "learning_rate": 4.350185306514182e-05, "loss": 0.7477299571037292, "step": 4327 }, { "epoch": 5.31042944785276, "grad_norm": 0.21099545061588287, "learning_rate": 4.349847718012102e-05, "loss": 0.7468663454055786, "step": 4328 }, { "epoch": 5.311656441717791, "grad_norm": 0.2436496466398239, "learning_rate": 4.349510054947937e-05, "loss": 0.7507567405700684, "step": 4329 }, { "epoch": 5.3128834355828225, "grad_norm": 0.3042985200881958, "learning_rate": 4.349172317335297e-05, "loss": 0.6554810404777527, "step": 4330 }, { "epoch": 5.314110429447853, "grad_norm": 0.24361774325370789, "learning_rate": 4.348834505187794e-05, "loss": 0.7540839314460754, "step": 4331 }, { "epoch": 5.315337423312884, "grad_norm": 0.18614819645881653, "learning_rate": 4.3484966185190454e-05, "loss": 0.8893031477928162, "step": 4332 }, { "epoch": 5.316564417177914, "grad_norm": 0.18243587017059326, "learning_rate": 4.3481586573426705e-05, "loss": 0.9803104996681213, "step": 4333 }, { "epoch": 5.317791411042945, "grad_norm": 0.28907740116119385, "learning_rate": 4.347820621672292e-05, "loss": 0.8222448229789734, "step": 4334 }, { "epoch": 5.319018404907975, "grad_norm": 0.22200456261634827, "learning_rate": 4.347482511521533e-05, "loss": 0.8995339870452881, "step": 4335 }, { "epoch": 5.3202453987730065, "grad_norm": 0.2191486954689026, "learning_rate": 4.347144326904023e-05, "loss": 0.6566038727760315, "step": 4336 }, { "epoch": 5.321472392638037, "grad_norm": 0.207849383354187, "learning_rate": 4.346806067833395e-05, "loss": 0.7450764179229736, "step": 4337 }, { "epoch": 5.322699386503068, "grad_norm": 0.2564043700695038, "learning_rate": 4.346467734323281e-05, "loss": 0.8079949617385864, "step": 4338 }, { "epoch": 5.323926380368098, "grad_norm": 0.27152594923973083, "learning_rate": 4.346129326387318e-05, "loss": 0.7505723237991333, "step": 4339 }, { "epoch": 5.325153374233129, "grad_norm": 0.25252845883369446, "learning_rate": 4.345790844039148e-05, "loss": 0.902949333190918, "step": 4340 }, { "epoch": 5.326380368098159, "grad_norm": 0.22442752122879028, "learning_rate": 4.345452287292414e-05, "loss": 0.8511807918548584, "step": 4341 }, { "epoch": 5.3276073619631905, "grad_norm": 0.2301509529352188, "learning_rate": 4.345113656160761e-05, "loss": 0.7423155307769775, "step": 4342 }, { "epoch": 5.328834355828221, "grad_norm": 0.346759170293808, "learning_rate": 4.344774950657838e-05, "loss": 0.5726842880249023, "step": 4343 }, { "epoch": 5.330061349693252, "grad_norm": 0.2137441784143448, "learning_rate": 4.3444361707972994e-05, "loss": 0.7486433982849121, "step": 4344 }, { "epoch": 5.331288343558282, "grad_norm": 0.2659221887588501, "learning_rate": 4.3440973165927994e-05, "loss": 0.620209813117981, "step": 4345 }, { "epoch": 5.332515337423313, "grad_norm": 0.2528659999370575, "learning_rate": 4.343758388057996e-05, "loss": 0.7828446626663208, "step": 4346 }, { "epoch": 5.333742331288343, "grad_norm": 0.19760969281196594, "learning_rate": 4.3434193852065494e-05, "loss": 0.7737514972686768, "step": 4347 }, { "epoch": 5.3349693251533745, "grad_norm": 0.23607730865478516, "learning_rate": 4.343080308052126e-05, "loss": 0.7075526714324951, "step": 4348 }, { "epoch": 5.336196319018405, "grad_norm": 0.27153387665748596, "learning_rate": 4.3427411566083917e-05, "loss": 0.7292568683624268, "step": 4349 }, { "epoch": 5.337423312883436, "grad_norm": 0.22501124441623688, "learning_rate": 4.3424019308890176e-05, "loss": 0.9017317295074463, "step": 4350 }, { "epoch": 5.338650306748466, "grad_norm": 0.19334830343723297, "learning_rate": 4.342062630907676e-05, "loss": 0.8604972958564758, "step": 4351 }, { "epoch": 5.339877300613497, "grad_norm": 0.23035509884357452, "learning_rate": 4.341723256678043e-05, "loss": 0.7101790904998779, "step": 4352 }, { "epoch": 5.341104294478527, "grad_norm": 0.23589175939559937, "learning_rate": 4.341383808213799e-05, "loss": 0.7019640207290649, "step": 4353 }, { "epoch": 5.3423312883435585, "grad_norm": 0.21360719203948975, "learning_rate": 4.341044285528625e-05, "loss": 0.8859604597091675, "step": 4354 }, { "epoch": 5.343558282208589, "grad_norm": 0.1829133778810501, "learning_rate": 4.340704688636207e-05, "loss": 0.9726088047027588, "step": 4355 }, { "epoch": 5.34478527607362, "grad_norm": 0.2565286159515381, "learning_rate": 4.340365017550233e-05, "loss": 0.655036985874176, "step": 4356 }, { "epoch": 5.34601226993865, "grad_norm": 0.2322549670934677, "learning_rate": 4.340025272284394e-05, "loss": 0.8725348114967346, "step": 4357 }, { "epoch": 5.347239263803681, "grad_norm": 0.2631027400493622, "learning_rate": 4.339685452852383e-05, "loss": 0.6384942531585693, "step": 4358 }, { "epoch": 5.348466257668711, "grad_norm": 0.24987007677555084, "learning_rate": 4.3393455592679e-05, "loss": 0.6987035274505615, "step": 4359 }, { "epoch": 5.3496932515337425, "grad_norm": 0.23073774576187134, "learning_rate": 4.339005591544642e-05, "loss": 0.7798694968223572, "step": 4360 }, { "epoch": 5.350920245398773, "grad_norm": 0.24810157716274261, "learning_rate": 4.338665549696315e-05, "loss": 0.8857898116111755, "step": 4361 }, { "epoch": 5.352147239263804, "grad_norm": 0.25657719373703003, "learning_rate": 4.338325433736623e-05, "loss": 0.7236741185188293, "step": 4362 }, { "epoch": 5.353374233128834, "grad_norm": 0.2796410322189331, "learning_rate": 4.3379852436792756e-05, "loss": 0.6504006385803223, "step": 4363 }, { "epoch": 5.354601226993865, "grad_norm": 0.27900534868240356, "learning_rate": 4.3376449795379856e-05, "loss": 0.5213396549224854, "step": 4364 }, { "epoch": 5.355828220858895, "grad_norm": 0.23108886182308197, "learning_rate": 4.337304641326467e-05, "loss": 0.863433837890625, "step": 4365 }, { "epoch": 5.3570552147239265, "grad_norm": 0.259587824344635, "learning_rate": 4.33696422905844e-05, "loss": 0.7114713788032532, "step": 4366 }, { "epoch": 5.358282208588957, "grad_norm": 0.20148251950740814, "learning_rate": 4.336623742747623e-05, "loss": 0.6380473375320435, "step": 4367 }, { "epoch": 5.359509202453988, "grad_norm": 0.22296473383903503, "learning_rate": 4.336283182407741e-05, "loss": 0.8035191893577576, "step": 4368 }, { "epoch": 5.360736196319018, "grad_norm": 0.28820928931236267, "learning_rate": 4.335942548052521e-05, "loss": 0.528359055519104, "step": 4369 }, { "epoch": 5.361963190184049, "grad_norm": 0.2274750918149948, "learning_rate": 4.3356018396956934e-05, "loss": 0.7408064603805542, "step": 4370 }, { "epoch": 5.363190184049079, "grad_norm": 0.2529030442237854, "learning_rate": 4.335261057350991e-05, "loss": 0.7211833000183105, "step": 4371 }, { "epoch": 5.3644171779141105, "grad_norm": 0.22426344454288483, "learning_rate": 4.33492020103215e-05, "loss": 0.8385266661643982, "step": 4372 }, { "epoch": 5.365644171779141, "grad_norm": 0.27231019735336304, "learning_rate": 4.3345792707529086e-05, "loss": 0.7483364939689636, "step": 4373 }, { "epoch": 5.366871165644172, "grad_norm": 0.2617359459400177, "learning_rate": 4.3342382665270096e-05, "loss": 0.7601895332336426, "step": 4374 }, { "epoch": 5.368098159509202, "grad_norm": 0.264931321144104, "learning_rate": 4.333897188368197e-05, "loss": 0.7428712248802185, "step": 4375 }, { "epoch": 5.369325153374233, "grad_norm": 0.2999555468559265, "learning_rate": 4.3335560362902196e-05, "loss": 0.6539457440376282, "step": 4376 }, { "epoch": 5.370552147239263, "grad_norm": 0.28103992342948914, "learning_rate": 4.333214810306828e-05, "loss": 0.6602308750152588, "step": 4377 }, { "epoch": 5.3717791411042946, "grad_norm": 0.23418261110782623, "learning_rate": 4.332873510431775e-05, "loss": 0.8039629459381104, "step": 4378 }, { "epoch": 5.373006134969325, "grad_norm": 0.32885047793388367, "learning_rate": 4.332532136678819e-05, "loss": 0.6362907886505127, "step": 4379 }, { "epoch": 5.374233128834356, "grad_norm": 0.20472921431064606, "learning_rate": 4.332190689061719e-05, "loss": 0.7839831113815308, "step": 4380 }, { "epoch": 5.375460122699386, "grad_norm": 0.26709088683128357, "learning_rate": 4.3318491675942386e-05, "loss": 0.6496373414993286, "step": 4381 }, { "epoch": 5.376687116564417, "grad_norm": 0.23530611395835876, "learning_rate": 4.3315075722901414e-05, "loss": 0.921393632888794, "step": 4382 }, { "epoch": 5.3779141104294474, "grad_norm": 0.22680948674678802, "learning_rate": 4.331165903163199e-05, "loss": 0.9605308771133423, "step": 4383 }, { "epoch": 5.379141104294479, "grad_norm": 0.2578519582748413, "learning_rate": 4.3308241602271805e-05, "loss": 0.6088932752609253, "step": 4384 }, { "epoch": 5.38036809815951, "grad_norm": 0.23631948232650757, "learning_rate": 4.3304823434958624e-05, "loss": 0.8953280448913574, "step": 4385 }, { "epoch": 5.38159509202454, "grad_norm": 0.23804421722888947, "learning_rate": 4.330140452983021e-05, "loss": 0.7833206653594971, "step": 4386 }, { "epoch": 5.38282208588957, "grad_norm": 0.22904491424560547, "learning_rate": 4.329798488702439e-05, "loss": 0.7295417785644531, "step": 4387 }, { "epoch": 5.384049079754601, "grad_norm": 0.27658262848854065, "learning_rate": 4.329456450667898e-05, "loss": 0.6256604790687561, "step": 4388 }, { "epoch": 5.385276073619632, "grad_norm": 0.210673987865448, "learning_rate": 4.3291143388931855e-05, "loss": 0.6550869941711426, "step": 4389 }, { "epoch": 5.386503067484663, "grad_norm": 0.245602548122406, "learning_rate": 4.32877215339209e-05, "loss": 0.6454447507858276, "step": 4390 }, { "epoch": 5.387730061349693, "grad_norm": 0.25208547711372375, "learning_rate": 4.3284298941784055e-05, "loss": 0.6463543176651001, "step": 4391 }, { "epoch": 5.388957055214724, "grad_norm": 0.2512560486793518, "learning_rate": 4.328087561265927e-05, "loss": 0.7371105551719666, "step": 4392 }, { "epoch": 5.390184049079755, "grad_norm": 0.20919330418109894, "learning_rate": 4.327745154668452e-05, "loss": 0.802272617816925, "step": 4393 }, { "epoch": 5.391411042944785, "grad_norm": 0.22055774927139282, "learning_rate": 4.327402674399783e-05, "loss": 0.8423931002616882, "step": 4394 }, { "epoch": 5.392638036809816, "grad_norm": 0.24167756736278534, "learning_rate": 4.327060120473725e-05, "loss": 0.6259632706642151, "step": 4395 }, { "epoch": 5.393865030674847, "grad_norm": 0.34556400775909424, "learning_rate": 4.3267174929040834e-05, "loss": 0.5453872084617615, "step": 4396 }, { "epoch": 5.395092024539878, "grad_norm": 0.21719937026500702, "learning_rate": 4.32637479170467e-05, "loss": 0.7305476069450378, "step": 4397 }, { "epoch": 5.396319018404908, "grad_norm": 0.23471395671367645, "learning_rate": 4.3260320168892974e-05, "loss": 0.8385615348815918, "step": 4398 }, { "epoch": 5.397546012269939, "grad_norm": 0.2492780089378357, "learning_rate": 4.3256891684717824e-05, "loss": 0.764629602432251, "step": 4399 }, { "epoch": 5.398773006134969, "grad_norm": 0.25387388467788696, "learning_rate": 4.3253462464659443e-05, "loss": 0.7061812877655029, "step": 4400 }, { "epoch": 5.4, "grad_norm": 0.2506404519081116, "learning_rate": 4.325003250885605e-05, "loss": 0.7634019255638123, "step": 4401 }, { "epoch": 5.401226993865031, "grad_norm": 0.2281194031238556, "learning_rate": 4.324660181744589e-05, "loss": 0.7654635906219482, "step": 4402 }, { "epoch": 5.402453987730062, "grad_norm": 0.2600998282432556, "learning_rate": 4.324317039056726e-05, "loss": 0.7121874094009399, "step": 4403 }, { "epoch": 5.403680981595092, "grad_norm": 0.20874956250190735, "learning_rate": 4.323973822835846e-05, "loss": 0.6679608821868896, "step": 4404 }, { "epoch": 5.404907975460123, "grad_norm": 0.2543136775493622, "learning_rate": 4.323630533095784e-05, "loss": 0.7890629768371582, "step": 4405 }, { "epoch": 5.406134969325153, "grad_norm": 0.2483919858932495, "learning_rate": 4.323287169850375e-05, "loss": 0.696753978729248, "step": 4406 }, { "epoch": 5.407361963190184, "grad_norm": 0.2690141797065735, "learning_rate": 4.322943733113461e-05, "loss": 0.6233193874359131, "step": 4407 }, { "epoch": 5.408588957055215, "grad_norm": 0.2288902848958969, "learning_rate": 4.322600222898885e-05, "loss": 0.767943263053894, "step": 4408 }, { "epoch": 5.409815950920246, "grad_norm": 0.279103547334671, "learning_rate": 4.3222566392204914e-05, "loss": 1.0543606281280518, "step": 4409 }, { "epoch": 5.411042944785276, "grad_norm": 0.2549223005771637, "learning_rate": 4.32191298209213e-05, "loss": 0.6679888963699341, "step": 4410 }, { "epoch": 5.412269938650307, "grad_norm": 0.21676820516586304, "learning_rate": 4.321569251527653e-05, "loss": 0.8458619117736816, "step": 4411 }, { "epoch": 5.413496932515337, "grad_norm": 0.32631364464759827, "learning_rate": 4.321225447540914e-05, "loss": 0.7086572051048279, "step": 4412 }, { "epoch": 5.414723926380368, "grad_norm": 0.2222387194633484, "learning_rate": 4.3208815701457724e-05, "loss": 0.9893773794174194, "step": 4413 }, { "epoch": 5.415950920245399, "grad_norm": 0.2531156837940216, "learning_rate": 4.320537619356088e-05, "loss": 0.8118014335632324, "step": 4414 }, { "epoch": 5.41717791411043, "grad_norm": 0.3421514928340912, "learning_rate": 4.3201935951857245e-05, "loss": 0.5626120567321777, "step": 4415 }, { "epoch": 5.41840490797546, "grad_norm": 0.3101438581943512, "learning_rate": 4.3198494976485474e-05, "loss": 0.6662193536758423, "step": 4416 }, { "epoch": 5.419631901840491, "grad_norm": 0.23859690129756927, "learning_rate": 4.319505326758428e-05, "loss": 0.7306616306304932, "step": 4417 }, { "epoch": 5.420858895705521, "grad_norm": 0.3137611448764801, "learning_rate": 4.3191610825292385e-05, "loss": 0.8038046360015869, "step": 4418 }, { "epoch": 5.422085889570552, "grad_norm": 0.24066448211669922, "learning_rate": 4.318816764974854e-05, "loss": 0.7059600353240967, "step": 4419 }, { "epoch": 5.423312883435583, "grad_norm": 0.22722499072551727, "learning_rate": 4.318472374109154e-05, "loss": 0.9192348718643188, "step": 4420 }, { "epoch": 5.424539877300614, "grad_norm": 0.263892263174057, "learning_rate": 4.318127909946018e-05, "loss": 0.5450785160064697, "step": 4421 }, { "epoch": 5.425766871165644, "grad_norm": 0.21395957469940186, "learning_rate": 4.3177833724993317e-05, "loss": 0.6529217958450317, "step": 4422 }, { "epoch": 5.426993865030675, "grad_norm": 0.20116521418094635, "learning_rate": 4.317438761782982e-05, "loss": 1.0430572032928467, "step": 4423 }, { "epoch": 5.428220858895705, "grad_norm": 0.24740950763225555, "learning_rate": 4.3170940778108595e-05, "loss": 0.5964787006378174, "step": 4424 }, { "epoch": 5.429447852760736, "grad_norm": 0.23653645813465118, "learning_rate": 4.316749320596857e-05, "loss": 0.7744765281677246, "step": 4425 }, { "epoch": 5.430674846625767, "grad_norm": 0.3160354495048523, "learning_rate": 4.316404490154871e-05, "loss": 0.5503638386726379, "step": 4426 }, { "epoch": 5.431901840490798, "grad_norm": 0.23655904829502106, "learning_rate": 4.316059586498801e-05, "loss": 0.7026976943016052, "step": 4427 }, { "epoch": 5.433128834355828, "grad_norm": 0.33792033791542053, "learning_rate": 4.3157146096425485e-05, "loss": 0.6364575624465942, "step": 4428 }, { "epoch": 5.434355828220859, "grad_norm": 0.23701831698417664, "learning_rate": 4.315369559600018e-05, "loss": 0.876522958278656, "step": 4429 }, { "epoch": 5.435582822085889, "grad_norm": 0.2695426344871521, "learning_rate": 4.315024436385118e-05, "loss": 0.6052594184875488, "step": 4430 }, { "epoch": 5.43680981595092, "grad_norm": 0.23311224579811096, "learning_rate": 4.3146792400117605e-05, "loss": 0.7279994487762451, "step": 4431 }, { "epoch": 5.438036809815951, "grad_norm": 0.2555212378501892, "learning_rate": 4.314333970493859e-05, "loss": 0.6933119893074036, "step": 4432 }, { "epoch": 5.439263803680982, "grad_norm": 0.230583056807518, "learning_rate": 4.3139886278453284e-05, "loss": 0.9920140504837036, "step": 4433 }, { "epoch": 5.440490797546012, "grad_norm": 0.2661970555782318, "learning_rate": 4.31364321208009e-05, "loss": 0.7628473043441772, "step": 4434 }, { "epoch": 5.441717791411043, "grad_norm": 0.2457614690065384, "learning_rate": 4.313297723212067e-05, "loss": 0.8537822961807251, "step": 4435 }, { "epoch": 5.442944785276073, "grad_norm": 0.29394614696502686, "learning_rate": 4.3129521612551844e-05, "loss": 0.6762909889221191, "step": 4436 }, { "epoch": 5.444171779141104, "grad_norm": 0.28196585178375244, "learning_rate": 4.312606526223371e-05, "loss": 0.6687684059143066, "step": 4437 }, { "epoch": 5.445398773006135, "grad_norm": 0.2039378136396408, "learning_rate": 4.312260818130558e-05, "loss": 0.7899062037467957, "step": 4438 }, { "epoch": 5.446625766871166, "grad_norm": 0.3039216995239258, "learning_rate": 4.3119150369906805e-05, "loss": 0.6528056263923645, "step": 4439 }, { "epoch": 5.447852760736196, "grad_norm": 0.21718059480190277, "learning_rate": 4.311569182817674e-05, "loss": 0.7130266427993774, "step": 4440 }, { "epoch": 5.449079754601227, "grad_norm": 0.2982368767261505, "learning_rate": 4.3112232556254815e-05, "loss": 0.7269096374511719, "step": 4441 }, { "epoch": 5.450306748466257, "grad_norm": 0.26634764671325684, "learning_rate": 4.3108772554280464e-05, "loss": 0.7524040937423706, "step": 4442 }, { "epoch": 5.451533742331288, "grad_norm": 0.2515278160572052, "learning_rate": 4.310531182239313e-05, "loss": 0.6554889678955078, "step": 4443 }, { "epoch": 5.452760736196319, "grad_norm": 0.1970566064119339, "learning_rate": 4.3101850360732316e-05, "loss": 0.8973913192749023, "step": 4444 }, { "epoch": 5.45398773006135, "grad_norm": 0.2239060252904892, "learning_rate": 4.3098388169437545e-05, "loss": 0.9037997126579285, "step": 4445 }, { "epoch": 5.45521472392638, "grad_norm": 0.24685706198215485, "learning_rate": 4.309492524864835e-05, "loss": 0.6889424324035645, "step": 4446 }, { "epoch": 5.456441717791411, "grad_norm": 0.227206289768219, "learning_rate": 4.3091461598504345e-05, "loss": 0.8959342241287231, "step": 4447 }, { "epoch": 5.457668711656442, "grad_norm": 0.3270144462585449, "learning_rate": 4.308799721914511e-05, "loss": 0.7470697164535522, "step": 4448 }, { "epoch": 5.458895705521472, "grad_norm": 0.22585642337799072, "learning_rate": 4.308453211071031e-05, "loss": 0.8902264833450317, "step": 4449 }, { "epoch": 5.460122699386503, "grad_norm": 0.25638601183891296, "learning_rate": 4.308106627333959e-05, "loss": 0.7557821273803711, "step": 4450 }, { "epoch": 5.461349693251534, "grad_norm": 0.2581453025341034, "learning_rate": 4.307759970717265e-05, "loss": 0.8089407682418823, "step": 4451 }, { "epoch": 5.462576687116565, "grad_norm": 0.24486802518367767, "learning_rate": 4.3074132412349234e-05, "loss": 0.933725118637085, "step": 4452 }, { "epoch": 5.463803680981595, "grad_norm": 0.25386011600494385, "learning_rate": 4.3070664389009086e-05, "loss": 0.7888078689575195, "step": 4453 }, { "epoch": 5.465030674846625, "grad_norm": 0.24465511739253998, "learning_rate": 4.306719563729199e-05, "loss": 0.7878260612487793, "step": 4454 }, { "epoch": 5.466257668711656, "grad_norm": 0.26791709661483765, "learning_rate": 4.306372615733778e-05, "loss": 0.7973357439041138, "step": 4455 }, { "epoch": 5.4674846625766875, "grad_norm": 0.25199347734451294, "learning_rate": 4.306025594928628e-05, "loss": 0.7344328165054321, "step": 4456 }, { "epoch": 5.468711656441718, "grad_norm": 0.2788715064525604, "learning_rate": 4.305678501327737e-05, "loss": 0.8603441715240479, "step": 4457 }, { "epoch": 5.469938650306749, "grad_norm": 0.24065424501895905, "learning_rate": 4.305331334945096e-05, "loss": 0.6895469427108765, "step": 4458 }, { "epoch": 5.471165644171779, "grad_norm": 0.2558625638484955, "learning_rate": 4.304984095794698e-05, "loss": 0.6796836256980896, "step": 4459 }, { "epoch": 5.47239263803681, "grad_norm": 0.23799237608909607, "learning_rate": 4.3046367838905386e-05, "loss": 0.653206467628479, "step": 4460 }, { "epoch": 5.47361963190184, "grad_norm": 0.21150310337543488, "learning_rate": 4.304289399246618e-05, "loss": 0.6953675746917725, "step": 4461 }, { "epoch": 5.4748466257668715, "grad_norm": 0.24304449558258057, "learning_rate": 4.303941941876938e-05, "loss": 0.8698437213897705, "step": 4462 }, { "epoch": 5.476073619631902, "grad_norm": 0.22250816226005554, "learning_rate": 4.3035944117955016e-05, "loss": 0.8312164545059204, "step": 4463 }, { "epoch": 5.477300613496933, "grad_norm": 0.24464358389377594, "learning_rate": 4.3032468090163206e-05, "loss": 0.7593538165092468, "step": 4464 }, { "epoch": 5.478527607361963, "grad_norm": 0.23251837491989136, "learning_rate": 4.3028991335534016e-05, "loss": 0.7569552659988403, "step": 4465 }, { "epoch": 5.479754601226994, "grad_norm": 0.24970854818820953, "learning_rate": 4.302551385420762e-05, "loss": 0.7749600410461426, "step": 4466 }, { "epoch": 5.480981595092024, "grad_norm": 0.25503382086753845, "learning_rate": 4.302203564632418e-05, "loss": 0.714356005191803, "step": 4467 }, { "epoch": 5.4822085889570555, "grad_norm": 0.21100834012031555, "learning_rate": 4.301855671202386e-05, "loss": 0.6869983673095703, "step": 4468 }, { "epoch": 5.483435582822086, "grad_norm": 0.23752674460411072, "learning_rate": 4.301507705144693e-05, "loss": 0.7238780856132507, "step": 4469 }, { "epoch": 5.484662576687117, "grad_norm": 0.21495501697063446, "learning_rate": 4.301159666473361e-05, "loss": 0.7313307523727417, "step": 4470 }, { "epoch": 5.485889570552147, "grad_norm": 0.2729959189891815, "learning_rate": 4.3008115552024206e-05, "loss": 0.7618892192840576, "step": 4471 }, { "epoch": 5.487116564417178, "grad_norm": 0.19228816032409668, "learning_rate": 4.3004633713459025e-05, "loss": 0.9105646014213562, "step": 4472 }, { "epoch": 5.488343558282208, "grad_norm": 0.21212945878505707, "learning_rate": 4.300115114917841e-05, "loss": 0.8634077906608582, "step": 4473 }, { "epoch": 5.4895705521472395, "grad_norm": 0.24979135394096375, "learning_rate": 4.299766785932273e-05, "loss": 0.7773393392562866, "step": 4474 }, { "epoch": 5.49079754601227, "grad_norm": 0.2621723711490631, "learning_rate": 4.2994183844032387e-05, "loss": 0.7783814668655396, "step": 4475 }, { "epoch": 5.492024539877301, "grad_norm": 0.25343453884124756, "learning_rate": 4.299069910344782e-05, "loss": 0.5760062336921692, "step": 4476 }, { "epoch": 5.493251533742331, "grad_norm": 0.2574551999568939, "learning_rate": 4.2987213637709486e-05, "loss": 0.8022688627243042, "step": 4477 }, { "epoch": 5.494478527607362, "grad_norm": 0.22201590240001678, "learning_rate": 4.298372744695787e-05, "loss": 0.8414767980575562, "step": 4478 }, { "epoch": 5.495705521472392, "grad_norm": 0.2152257263660431, "learning_rate": 4.298024053133348e-05, "loss": 0.9387240409851074, "step": 4479 }, { "epoch": 5.4969325153374236, "grad_norm": 0.3717319965362549, "learning_rate": 4.297675289097689e-05, "loss": 0.485925555229187, "step": 4480 }, { "epoch": 5.498159509202454, "grad_norm": 0.2502201199531555, "learning_rate": 4.2973264526028655e-05, "loss": 0.7712613344192505, "step": 4481 }, { "epoch": 5.499386503067485, "grad_norm": 0.26504847407341003, "learning_rate": 4.296977543662939e-05, "loss": 0.6931453347206116, "step": 4482 }, { "epoch": 5.500613496932515, "grad_norm": 0.25625959038734436, "learning_rate": 4.2966285622919726e-05, "loss": 0.7084206938743591, "step": 4483 }, { "epoch": 5.501840490797546, "grad_norm": 0.19561992585659027, "learning_rate": 4.2962795085040334e-05, "loss": 0.7299501895904541, "step": 4484 }, { "epoch": 5.5030674846625764, "grad_norm": 0.2359837293624878, "learning_rate": 4.29593038231319e-05, "loss": 0.7829495668411255, "step": 4485 }, { "epoch": 5.504294478527608, "grad_norm": 0.23209735751152039, "learning_rate": 4.2955811837335155e-05, "loss": 0.7482068538665771, "step": 4486 }, { "epoch": 5.505521472392638, "grad_norm": 0.271284818649292, "learning_rate": 4.295231912779084e-05, "loss": 0.8510841727256775, "step": 4487 }, { "epoch": 5.506748466257669, "grad_norm": 0.2652483284473419, "learning_rate": 4.294882569463975e-05, "loss": 0.7905210256576538, "step": 4488 }, { "epoch": 5.507975460122699, "grad_norm": 0.2354745715856552, "learning_rate": 4.294533153802268e-05, "loss": 0.7192255854606628, "step": 4489 }, { "epoch": 5.50920245398773, "grad_norm": 0.2299199253320694, "learning_rate": 4.2941836658080484e-05, "loss": 0.8197226524353027, "step": 4490 }, { "epoch": 5.5104294478527605, "grad_norm": 0.26797980070114136, "learning_rate": 4.293834105495402e-05, "loss": 0.7824395895004272, "step": 4491 }, { "epoch": 5.511656441717792, "grad_norm": 0.2641502618789673, "learning_rate": 4.2934844728784195e-05, "loss": 0.693508505821228, "step": 4492 }, { "epoch": 5.512883435582822, "grad_norm": 0.28856563568115234, "learning_rate": 4.2931347679711924e-05, "loss": 0.674440860748291, "step": 4493 }, { "epoch": 5.514110429447853, "grad_norm": 0.23551861941814423, "learning_rate": 4.2927849907878177e-05, "loss": 0.8138654232025146, "step": 4494 }, { "epoch": 5.515337423312883, "grad_norm": 0.2701793313026428, "learning_rate": 4.2924351413423925e-05, "loss": 0.7034265995025635, "step": 4495 }, { "epoch": 5.516564417177914, "grad_norm": 0.25847557187080383, "learning_rate": 4.292085219649019e-05, "loss": 0.7349539995193481, "step": 4496 }, { "epoch": 5.5177914110429445, "grad_norm": 0.1949193775653839, "learning_rate": 4.2917352257218026e-05, "loss": 0.9077062010765076, "step": 4497 }, { "epoch": 5.519018404907976, "grad_norm": 0.21946905553340912, "learning_rate": 4.291385159574848e-05, "loss": 0.8445423245429993, "step": 4498 }, { "epoch": 5.520245398773006, "grad_norm": 0.2664746642112732, "learning_rate": 4.291035021222267e-05, "loss": 0.6873661279678345, "step": 4499 }, { "epoch": 5.521472392638037, "grad_norm": 0.27072152495384216, "learning_rate": 4.2906848106781736e-05, "loss": 0.6815374493598938, "step": 4500 }, { "epoch": 5.522699386503067, "grad_norm": 0.25038066506385803, "learning_rate": 4.290334527956682e-05, "loss": 0.7966043949127197, "step": 4501 }, { "epoch": 5.523926380368098, "grad_norm": 0.1942310482263565, "learning_rate": 4.289984173071911e-05, "loss": 0.9771155118942261, "step": 4502 }, { "epoch": 5.5251533742331285, "grad_norm": 0.2653249204158783, "learning_rate": 4.289633746037984e-05, "loss": 0.7882168292999268, "step": 4503 }, { "epoch": 5.52638036809816, "grad_norm": 0.25219619274139404, "learning_rate": 4.289283246869025e-05, "loss": 0.8193211555480957, "step": 4504 }, { "epoch": 5.52760736196319, "grad_norm": 0.23878009617328644, "learning_rate": 4.2889326755791616e-05, "loss": 0.9045209884643555, "step": 4505 }, { "epoch": 5.528834355828221, "grad_norm": 0.2833469808101654, "learning_rate": 4.2885820321825245e-05, "loss": 0.6734360456466675, "step": 4506 }, { "epoch": 5.530061349693252, "grad_norm": 0.27409157156944275, "learning_rate": 4.288231316693245e-05, "loss": 0.6823060512542725, "step": 4507 }, { "epoch": 5.531288343558282, "grad_norm": 0.27057084441185, "learning_rate": 4.287880529125463e-05, "loss": 0.7715628147125244, "step": 4508 }, { "epoch": 5.5325153374233125, "grad_norm": 0.2921386957168579, "learning_rate": 4.287529669493317e-05, "loss": 0.650979220867157, "step": 4509 }, { "epoch": 5.533742331288344, "grad_norm": 0.2853836417198181, "learning_rate": 4.287178737810946e-05, "loss": 0.5215302109718323, "step": 4510 }, { "epoch": 5.534969325153375, "grad_norm": 0.23905985057353973, "learning_rate": 4.286827734092498e-05, "loss": 0.8378152847290039, "step": 4511 }, { "epoch": 5.536196319018405, "grad_norm": 0.255600243806839, "learning_rate": 4.286476658352121e-05, "loss": 0.6463440656661987, "step": 4512 }, { "epoch": 5.537423312883435, "grad_norm": 0.21846918761730194, "learning_rate": 4.286125510603964e-05, "loss": 0.7207092046737671, "step": 4513 }, { "epoch": 5.538650306748466, "grad_norm": 0.30224892497062683, "learning_rate": 4.285774290862182e-05, "loss": 0.6862341165542603, "step": 4514 }, { "epoch": 5.539877300613497, "grad_norm": 0.32200053334236145, "learning_rate": 4.285422999140932e-05, "loss": 0.7751246690750122, "step": 4515 }, { "epoch": 5.541104294478528, "grad_norm": 0.3200450837612152, "learning_rate": 4.2850716354543736e-05, "loss": 0.6827324628829956, "step": 4516 }, { "epoch": 5.542331288343558, "grad_norm": 0.2639163136482239, "learning_rate": 4.284720199816668e-05, "loss": 0.6084370613098145, "step": 4517 }, { "epoch": 5.543558282208589, "grad_norm": 0.3189186751842499, "learning_rate": 4.2843686922419814e-05, "loss": 0.6290724277496338, "step": 4518 }, { "epoch": 5.54478527607362, "grad_norm": 0.22699838876724243, "learning_rate": 4.2840171127444825e-05, "loss": 0.714242696762085, "step": 4519 }, { "epoch": 5.54601226993865, "grad_norm": 0.21778525412082672, "learning_rate": 4.283665461338341e-05, "loss": 0.8033387064933777, "step": 4520 }, { "epoch": 5.5472392638036805, "grad_norm": 0.2343481332063675, "learning_rate": 4.283313738037733e-05, "loss": 0.7119626402854919, "step": 4521 }, { "epoch": 5.548466257668712, "grad_norm": 0.22319075465202332, "learning_rate": 4.282961942856833e-05, "loss": 0.6886709928512573, "step": 4522 }, { "epoch": 5.549693251533743, "grad_norm": 0.21877166628837585, "learning_rate": 4.2826100758098235e-05, "loss": 0.893475353717804, "step": 4523 }, { "epoch": 5.550920245398773, "grad_norm": 0.20446333289146423, "learning_rate": 4.282258136910886e-05, "loss": 0.8932533264160156, "step": 4524 }, { "epoch": 5.552147239263804, "grad_norm": 0.36074140667915344, "learning_rate": 4.2819061261742056e-05, "loss": 0.7145532369613647, "step": 4525 }, { "epoch": 5.553374233128834, "grad_norm": 0.22311976552009583, "learning_rate": 4.281554043613972e-05, "loss": 0.771878182888031, "step": 4526 }, { "epoch": 5.554601226993865, "grad_norm": 0.3109515309333801, "learning_rate": 4.281201889244375e-05, "loss": 0.6617399454116821, "step": 4527 }, { "epoch": 5.555828220858896, "grad_norm": 0.2796854078769684, "learning_rate": 4.2808496630796106e-05, "loss": 0.5920553207397461, "step": 4528 }, { "epoch": 5.557055214723927, "grad_norm": 0.268600195646286, "learning_rate": 4.280497365133875e-05, "loss": 0.6353069543838501, "step": 4529 }, { "epoch": 5.558282208588957, "grad_norm": 0.2162739634513855, "learning_rate": 4.28014499542137e-05, "loss": 0.9508378505706787, "step": 4530 }, { "epoch": 5.559509202453988, "grad_norm": 0.31064194440841675, "learning_rate": 4.279792553956297e-05, "loss": 0.6537706851959229, "step": 4531 }, { "epoch": 5.560736196319018, "grad_norm": 0.23840585350990295, "learning_rate": 4.279440040752861e-05, "loss": 0.8263545632362366, "step": 4532 }, { "epoch": 5.561963190184049, "grad_norm": 0.32300952076911926, "learning_rate": 4.279087455825273e-05, "loss": 0.7456942200660706, "step": 4533 }, { "epoch": 5.56319018404908, "grad_norm": 0.3095055818557739, "learning_rate": 4.2787347991877436e-05, "loss": 0.6571251749992371, "step": 4534 }, { "epoch": 5.564417177914111, "grad_norm": 0.2611429691314697, "learning_rate": 4.278382070854487e-05, "loss": 0.7374638915061951, "step": 4535 }, { "epoch": 5.565644171779141, "grad_norm": 0.24062961339950562, "learning_rate": 4.278029270839723e-05, "loss": 0.6300645470619202, "step": 4536 }, { "epoch": 5.566871165644172, "grad_norm": 0.28626546263694763, "learning_rate": 4.277676399157669e-05, "loss": 0.6443771123886108, "step": 4537 }, { "epoch": 5.568098159509202, "grad_norm": 0.2284204661846161, "learning_rate": 4.2773234558225495e-05, "loss": 0.9271685481071472, "step": 4538 }, { "epoch": 5.569325153374233, "grad_norm": 0.26075294613838196, "learning_rate": 4.2769704408485904e-05, "loss": 0.6092112064361572, "step": 4539 }, { "epoch": 5.570552147239264, "grad_norm": 0.22198133170604706, "learning_rate": 4.276617354250021e-05, "loss": 0.8850064873695374, "step": 4540 }, { "epoch": 5.571779141104295, "grad_norm": 0.2331104874610901, "learning_rate": 4.2762641960410735e-05, "loss": 0.7148695588111877, "step": 4541 }, { "epoch": 5.573006134969325, "grad_norm": 0.23190724849700928, "learning_rate": 4.275910966235982e-05, "loss": 0.8437101244926453, "step": 4542 }, { "epoch": 5.574233128834356, "grad_norm": 0.2698463201522827, "learning_rate": 4.275557664848985e-05, "loss": 0.7093725204467773, "step": 4543 }, { "epoch": 5.575460122699386, "grad_norm": 0.21790768206119537, "learning_rate": 4.2752042918943214e-05, "loss": 0.7282576560974121, "step": 4544 }, { "epoch": 5.576687116564417, "grad_norm": 0.2629272937774658, "learning_rate": 4.274850847386237e-05, "loss": 0.5978923439979553, "step": 4545 }, { "epoch": 5.577914110429448, "grad_norm": 0.25276392698287964, "learning_rate": 4.274497331338976e-05, "loss": 0.8495100736618042, "step": 4546 }, { "epoch": 5.579141104294479, "grad_norm": 0.23966524004936218, "learning_rate": 4.27414374376679e-05, "loss": 0.7541409730911255, "step": 4547 }, { "epoch": 5.580368098159509, "grad_norm": 0.22494173049926758, "learning_rate": 4.2737900846839285e-05, "loss": 0.8320722579956055, "step": 4548 }, { "epoch": 5.58159509202454, "grad_norm": 0.23857176303863525, "learning_rate": 4.273436354104648e-05, "loss": 0.9135860204696655, "step": 4549 }, { "epoch": 5.58282208588957, "grad_norm": 0.25019216537475586, "learning_rate": 4.273082552043206e-05, "loss": 0.8747447729110718, "step": 4550 }, { "epoch": 5.584049079754601, "grad_norm": 0.25974729657173157, "learning_rate": 4.272728678513863e-05, "loss": 0.7484340667724609, "step": 4551 }, { "epoch": 5.585276073619632, "grad_norm": 0.2287992238998413, "learning_rate": 4.272374733530884e-05, "loss": 0.8247665762901306, "step": 4552 }, { "epoch": 5.586503067484663, "grad_norm": 0.25583747029304504, "learning_rate": 4.272020717108534e-05, "loss": 0.6688866019248962, "step": 4553 }, { "epoch": 5.587730061349693, "grad_norm": 0.2828013598918915, "learning_rate": 4.271666629261082e-05, "loss": 0.8413088321685791, "step": 4554 }, { "epoch": 5.588957055214724, "grad_norm": 0.33046117424964905, "learning_rate": 4.2713124700028016e-05, "loss": 0.7385681867599487, "step": 4555 }, { "epoch": 5.590184049079754, "grad_norm": 0.2267242819070816, "learning_rate": 4.270958239347968e-05, "loss": 0.7886542677879333, "step": 4556 }, { "epoch": 5.591411042944785, "grad_norm": 0.26380500197410583, "learning_rate": 4.270603937310859e-05, "loss": 0.8412954211235046, "step": 4557 }, { "epoch": 5.592638036809816, "grad_norm": 0.23312871158123016, "learning_rate": 4.270249563905754e-05, "loss": 0.7000341415405273, "step": 4558 }, { "epoch": 5.593865030674847, "grad_norm": 0.26565760374069214, "learning_rate": 4.269895119146939e-05, "loss": 0.8145960569381714, "step": 4559 }, { "epoch": 5.595092024539877, "grad_norm": 0.23740485310554504, "learning_rate": 4.269540603048699e-05, "loss": 0.9596184492111206, "step": 4560 }, { "epoch": 5.596319018404908, "grad_norm": 0.27669405937194824, "learning_rate": 4.2691860156253246e-05, "loss": 0.7757238149642944, "step": 4561 }, { "epoch": 5.597546012269938, "grad_norm": 0.24885298311710358, "learning_rate": 4.268831356891107e-05, "loss": 0.6992084383964539, "step": 4562 }, { "epoch": 5.598773006134969, "grad_norm": 0.2511221766471863, "learning_rate": 4.268476626860343e-05, "loss": 0.832524299621582, "step": 4563 }, { "epoch": 5.6, "grad_norm": 0.24569165706634521, "learning_rate": 4.26812182554733e-05, "loss": 0.7200828790664673, "step": 4564 }, { "epoch": 5.601226993865031, "grad_norm": 0.2072889357805252, "learning_rate": 4.267766952966369e-05, "loss": 0.7418338060379028, "step": 4565 }, { "epoch": 5.602453987730061, "grad_norm": 0.29334402084350586, "learning_rate": 4.267412009131764e-05, "loss": 0.7561975121498108, "step": 4566 }, { "epoch": 5.603680981595092, "grad_norm": 0.22010712325572968, "learning_rate": 4.267056994057822e-05, "loss": 0.8801463842391968, "step": 4567 }, { "epoch": 5.604907975460122, "grad_norm": 0.27823972702026367, "learning_rate": 4.266701907758852e-05, "loss": 0.6908450126647949, "step": 4568 }, { "epoch": 5.606134969325153, "grad_norm": 0.28323256969451904, "learning_rate": 4.266346750249167e-05, "loss": 0.601428747177124, "step": 4569 }, { "epoch": 5.6073619631901845, "grad_norm": 0.2353680580854416, "learning_rate": 4.265991521543082e-05, "loss": 0.733452320098877, "step": 4570 }, { "epoch": 5.608588957055215, "grad_norm": 0.23296202719211578, "learning_rate": 4.265636221654915e-05, "loss": 0.778853714466095, "step": 4571 }, { "epoch": 5.609815950920245, "grad_norm": 0.23400811851024628, "learning_rate": 4.2652808505989884e-05, "loss": 0.7721114158630371, "step": 4572 }, { "epoch": 5.611042944785276, "grad_norm": 0.1837013214826584, "learning_rate": 4.2649254083896255e-05, "loss": 0.8791821002960205, "step": 4573 }, { "epoch": 5.612269938650307, "grad_norm": 0.21819451451301575, "learning_rate": 4.2645698950411535e-05, "loss": 0.8911315202713013, "step": 4574 }, { "epoch": 5.613496932515337, "grad_norm": 0.23801374435424805, "learning_rate": 4.264214310567901e-05, "loss": 0.822864294052124, "step": 4575 }, { "epoch": 5.614723926380368, "grad_norm": 0.29216521978378296, "learning_rate": 4.2638586549842016e-05, "loss": 0.7699704766273499, "step": 4576 }, { "epoch": 5.615950920245399, "grad_norm": 0.24314425885677338, "learning_rate": 4.2635029283043906e-05, "loss": 0.7470192909240723, "step": 4577 }, { "epoch": 5.61717791411043, "grad_norm": 0.24363461136817932, "learning_rate": 4.263147130542806e-05, "loss": 0.8919646739959717, "step": 4578 }, { "epoch": 5.61840490797546, "grad_norm": 0.21763436496257782, "learning_rate": 4.26279126171379e-05, "loss": 0.6932550668716431, "step": 4579 }, { "epoch": 5.61963190184049, "grad_norm": 0.25499746203422546, "learning_rate": 4.262435321831685e-05, "loss": 0.9319256544113159, "step": 4580 }, { "epoch": 5.620858895705521, "grad_norm": 0.32315346598625183, "learning_rate": 4.2620793109108385e-05, "loss": 0.7348275184631348, "step": 4581 }, { "epoch": 5.6220858895705526, "grad_norm": 0.24252338707447052, "learning_rate": 4.2617232289656005e-05, "loss": 0.7612949013710022, "step": 4582 }, { "epoch": 5.623312883435583, "grad_norm": 0.2350257784128189, "learning_rate": 4.261367076010325e-05, "loss": 0.7498478889465332, "step": 4583 }, { "epoch": 5.624539877300613, "grad_norm": 0.2576235830783844, "learning_rate": 4.2610108520593655e-05, "loss": 0.7881041765213013, "step": 4584 }, { "epoch": 5.625766871165644, "grad_norm": 0.20481562614440918, "learning_rate": 4.2606545571270816e-05, "loss": 0.8211688995361328, "step": 4585 }, { "epoch": 5.626993865030675, "grad_norm": 0.21294893324375153, "learning_rate": 4.2602981912278337e-05, "loss": 0.7458100318908691, "step": 4586 }, { "epoch": 5.6282208588957054, "grad_norm": 0.25144046545028687, "learning_rate": 4.259941754375986e-05, "loss": 0.7825895547866821, "step": 4587 }, { "epoch": 5.629447852760737, "grad_norm": 0.28934046626091003, "learning_rate": 4.259585246585905e-05, "loss": 0.6138135194778442, "step": 4588 }, { "epoch": 5.630674846625767, "grad_norm": 0.24097739160060883, "learning_rate": 4.259228667871963e-05, "loss": 0.6723729372024536, "step": 4589 }, { "epoch": 5.631901840490798, "grad_norm": 0.2749398648738861, "learning_rate": 4.2588720182485296e-05, "loss": 0.6151736378669739, "step": 4590 }, { "epoch": 5.633128834355828, "grad_norm": 0.2967562675476074, "learning_rate": 4.258515297729983e-05, "loss": 0.7362946271896362, "step": 4591 }, { "epoch": 5.634355828220859, "grad_norm": 0.24714210629463196, "learning_rate": 4.258158506330698e-05, "loss": 0.7617048025131226, "step": 4592 }, { "epoch": 5.6355828220858895, "grad_norm": 0.30392736196517944, "learning_rate": 4.257801644065059e-05, "loss": 0.7937813401222229, "step": 4593 }, { "epoch": 5.636809815950921, "grad_norm": 0.24035987257957458, "learning_rate": 4.2574447109474494e-05, "loss": 0.85625821352005, "step": 4594 }, { "epoch": 5.638036809815951, "grad_norm": 0.23315492272377014, "learning_rate": 4.257087706992256e-05, "loss": 0.7050710916519165, "step": 4595 }, { "epoch": 5.639263803680982, "grad_norm": 0.20972983539104462, "learning_rate": 4.256730632213868e-05, "loss": 0.6095747947692871, "step": 4596 }, { "epoch": 5.640490797546012, "grad_norm": 0.24913839995861053, "learning_rate": 4.256373486626679e-05, "loss": 0.6395794153213501, "step": 4597 }, { "epoch": 5.641717791411043, "grad_norm": 0.2449091523885727, "learning_rate": 4.256016270245084e-05, "loss": 0.6214438676834106, "step": 4598 }, { "epoch": 5.6429447852760735, "grad_norm": 0.21890540421009064, "learning_rate": 4.255658983083481e-05, "loss": 0.8991913199424744, "step": 4599 }, { "epoch": 5.644171779141105, "grad_norm": 0.20097267627716064, "learning_rate": 4.255301625156273e-05, "loss": 0.6795922517776489, "step": 4600 }, { "epoch": 5.645398773006135, "grad_norm": 0.2898382544517517, "learning_rate": 4.254944196477862e-05, "loss": 0.8351344466209412, "step": 4601 }, { "epoch": 5.646625766871166, "grad_norm": 0.2052198201417923, "learning_rate": 4.2545866970626556e-05, "loss": 0.7626831531524658, "step": 4602 }, { "epoch": 5.647852760736196, "grad_norm": 0.28116166591644287, "learning_rate": 4.2542291269250635e-05, "loss": 0.7738326787948608, "step": 4603 }, { "epoch": 5.649079754601227, "grad_norm": 0.2103448063135147, "learning_rate": 4.253871486079498e-05, "loss": 0.8523114919662476, "step": 4604 }, { "epoch": 5.6503067484662575, "grad_norm": 0.29934409260749817, "learning_rate": 4.2535137745403766e-05, "loss": 0.724364161491394, "step": 4605 }, { "epoch": 5.651533742331289, "grad_norm": 0.30534783005714417, "learning_rate": 4.2531559923221154e-05, "loss": 0.7843092679977417, "step": 4606 }, { "epoch": 5.652760736196319, "grad_norm": 0.21455271542072296, "learning_rate": 4.252798139439136e-05, "loss": 0.8908126354217529, "step": 4607 }, { "epoch": 5.65398773006135, "grad_norm": 0.28568172454833984, "learning_rate": 4.2524402159058626e-05, "loss": 0.6853520274162292, "step": 4608 }, { "epoch": 5.65521472392638, "grad_norm": 0.2909097671508789, "learning_rate": 4.252082221736723e-05, "loss": 0.6870957612991333, "step": 4609 }, { "epoch": 5.656441717791411, "grad_norm": 0.16847050189971924, "learning_rate": 4.2517241569461456e-05, "loss": 0.8365292549133301, "step": 4610 }, { "epoch": 5.6576687116564415, "grad_norm": 0.2720094621181488, "learning_rate": 4.251366021548563e-05, "loss": 0.7233437895774841, "step": 4611 }, { "epoch": 5.658895705521473, "grad_norm": 0.33359214663505554, "learning_rate": 4.251007815558412e-05, "loss": 0.7572518587112427, "step": 4612 }, { "epoch": 5.660122699386503, "grad_norm": 0.28330710530281067, "learning_rate": 4.2506495389901304e-05, "loss": 0.7160791754722595, "step": 4613 }, { "epoch": 5.661349693251534, "grad_norm": 0.273395836353302, "learning_rate": 4.2502911918581576e-05, "loss": 0.6766534447669983, "step": 4614 }, { "epoch": 5.662576687116564, "grad_norm": 0.23286010324954987, "learning_rate": 4.2499327741769393e-05, "loss": 0.70229572057724, "step": 4615 }, { "epoch": 5.663803680981595, "grad_norm": 0.22633130848407745, "learning_rate": 4.249574285960922e-05, "loss": 0.6792296171188354, "step": 4616 }, { "epoch": 5.6650306748466255, "grad_norm": 0.2893160283565521, "learning_rate": 4.249215727224555e-05, "loss": 0.656683087348938, "step": 4617 }, { "epoch": 5.666257668711657, "grad_norm": 0.24007056653499603, "learning_rate": 4.2488570979822915e-05, "loss": 0.7636604309082031, "step": 4618 }, { "epoch": 5.667484662576687, "grad_norm": 0.2732355296611786, "learning_rate": 4.248498398248586e-05, "loss": 0.7416073679924011, "step": 4619 }, { "epoch": 5.668711656441718, "grad_norm": 0.20983189344406128, "learning_rate": 4.248139628037896e-05, "loss": 0.8697794675827026, "step": 4620 }, { "epoch": 5.669938650306748, "grad_norm": 0.2693086266517639, "learning_rate": 4.2477807873646845e-05, "loss": 0.7129133939743042, "step": 4621 }, { "epoch": 5.671165644171779, "grad_norm": 0.32160505652427673, "learning_rate": 4.2474218762434146e-05, "loss": 0.7136988639831543, "step": 4622 }, { "epoch": 5.6723926380368095, "grad_norm": 0.2214958667755127, "learning_rate": 4.2470628946885524e-05, "loss": 0.8249160051345825, "step": 4623 }, { "epoch": 5.673619631901841, "grad_norm": 0.2534981966018677, "learning_rate": 4.2467038427145673e-05, "loss": 0.6999915838241577, "step": 4624 }, { "epoch": 5.674846625766871, "grad_norm": 0.24870693683624268, "learning_rate": 4.246344720335932e-05, "loss": 0.7901906967163086, "step": 4625 }, { "epoch": 5.676073619631902, "grad_norm": 0.3676050007343292, "learning_rate": 4.2459855275671216e-05, "loss": 0.6193346381187439, "step": 4626 }, { "epoch": 5.677300613496932, "grad_norm": 0.29707765579223633, "learning_rate": 4.2456262644226146e-05, "loss": 0.7155962586402893, "step": 4627 }, { "epoch": 5.678527607361963, "grad_norm": 0.2773699462413788, "learning_rate": 4.245266930916891e-05, "loss": 0.6336665153503418, "step": 4628 }, { "epoch": 5.6797546012269935, "grad_norm": 0.27683401107788086, "learning_rate": 4.2449075270644365e-05, "loss": 0.7919055223464966, "step": 4629 }, { "epoch": 5.680981595092025, "grad_norm": 0.20457303524017334, "learning_rate": 4.2445480528797354e-05, "loss": 0.8953977227210999, "step": 4630 }, { "epoch": 5.682208588957055, "grad_norm": 0.2618420422077179, "learning_rate": 4.244188508377278e-05, "loss": 0.5991165637969971, "step": 4631 }, { "epoch": 5.683435582822086, "grad_norm": 0.2516672611236572, "learning_rate": 4.243828893571556e-05, "loss": 0.6972525119781494, "step": 4632 }, { "epoch": 5.684662576687117, "grad_norm": 0.30343499779701233, "learning_rate": 4.243469208477066e-05, "loss": 0.7633234262466431, "step": 4633 }, { "epoch": 5.685889570552147, "grad_norm": 0.24425473809242249, "learning_rate": 4.2431094531083035e-05, "loss": 0.5734155774116516, "step": 4634 }, { "epoch": 5.6871165644171775, "grad_norm": 0.24843205511569977, "learning_rate": 4.2427496274797704e-05, "loss": 0.656487226486206, "step": 4635 }, { "epoch": 5.688343558282209, "grad_norm": 0.358280748128891, "learning_rate": 4.242389731605971e-05, "loss": 0.5120719075202942, "step": 4636 }, { "epoch": 5.68957055214724, "grad_norm": 0.2223614752292633, "learning_rate": 4.2420297655014105e-05, "loss": 1.0972704887390137, "step": 4637 }, { "epoch": 5.69079754601227, "grad_norm": 0.2567724883556366, "learning_rate": 4.241669729180598e-05, "loss": 0.6269451379776001, "step": 4638 }, { "epoch": 5.6920245398773, "grad_norm": 0.25387123227119446, "learning_rate": 4.241309622658047e-05, "loss": 0.8532664179801941, "step": 4639 }, { "epoch": 5.693251533742331, "grad_norm": 0.26725831627845764, "learning_rate": 4.240949445948272e-05, "loss": 0.782257080078125, "step": 4640 }, { "epoch": 5.694478527607362, "grad_norm": 0.2820286750793457, "learning_rate": 4.2405891990657896e-05, "loss": 0.6953644156455994, "step": 4641 }, { "epoch": 5.695705521472393, "grad_norm": 0.2831324338912964, "learning_rate": 4.240228882025121e-05, "loss": 0.6524466276168823, "step": 4642 }, { "epoch": 5.696932515337423, "grad_norm": 0.24518398940563202, "learning_rate": 4.239868494840789e-05, "loss": 0.7360973358154297, "step": 4643 }, { "epoch": 5.698159509202454, "grad_norm": 0.2646332085132599, "learning_rate": 4.23950803752732e-05, "loss": 0.5699343085289001, "step": 4644 }, { "epoch": 5.699386503067485, "grad_norm": 0.2531006634235382, "learning_rate": 4.239147510099244e-05, "loss": 0.8355178236961365, "step": 4645 }, { "epoch": 5.700613496932515, "grad_norm": 0.2922840416431427, "learning_rate": 4.238786912571092e-05, "loss": 0.6169830560684204, "step": 4646 }, { "epoch": 5.7018404907975455, "grad_norm": 0.236654594540596, "learning_rate": 4.238426244957399e-05, "loss": 0.8200740814208984, "step": 4647 }, { "epoch": 5.703067484662577, "grad_norm": 0.22449180483818054, "learning_rate": 4.238065507272702e-05, "loss": 0.7692312002182007, "step": 4648 }, { "epoch": 5.704294478527608, "grad_norm": 0.3130163550376892, "learning_rate": 4.2377046995315406e-05, "loss": 0.6977909803390503, "step": 4649 }, { "epoch": 5.705521472392638, "grad_norm": 0.2289087176322937, "learning_rate": 4.2373438217484593e-05, "loss": 0.7709519267082214, "step": 4650 }, { "epoch": 5.706748466257669, "grad_norm": 0.2379102259874344, "learning_rate": 4.236982873938003e-05, "loss": 0.7127621173858643, "step": 4651 }, { "epoch": 5.707975460122699, "grad_norm": 0.2563347816467285, "learning_rate": 4.236621856114723e-05, "loss": 0.7106269598007202, "step": 4652 }, { "epoch": 5.70920245398773, "grad_norm": 0.27089840173721313, "learning_rate": 4.2362607682931677e-05, "loss": 0.7643658518791199, "step": 4653 }, { "epoch": 5.710429447852761, "grad_norm": 0.22239311039447784, "learning_rate": 4.2358996104878936e-05, "loss": 0.7556246519088745, "step": 4654 }, { "epoch": 5.711656441717792, "grad_norm": 0.2898857891559601, "learning_rate": 4.235538382713456e-05, "loss": 0.7976846694946289, "step": 4655 }, { "epoch": 5.712883435582822, "grad_norm": 0.2575312554836273, "learning_rate": 4.235177084984417e-05, "loss": 0.5683778524398804, "step": 4656 }, { "epoch": 5.714110429447853, "grad_norm": 0.25384512543678284, "learning_rate": 4.234815717315338e-05, "loss": 0.6800442337989807, "step": 4657 }, { "epoch": 5.715337423312883, "grad_norm": 0.2474393993616104, "learning_rate": 4.2344542797207856e-05, "loss": 0.775975227355957, "step": 4658 }, { "epoch": 5.716564417177914, "grad_norm": 0.26655322313308716, "learning_rate": 4.234092772215328e-05, "loss": 0.6028920412063599, "step": 4659 }, { "epoch": 5.717791411042945, "grad_norm": 0.2224249690771103, "learning_rate": 4.233731194813536e-05, "loss": 0.828079342842102, "step": 4660 }, { "epoch": 5.719018404907976, "grad_norm": 0.2989673316478729, "learning_rate": 4.233369547529985e-05, "loss": 0.7172046899795532, "step": 4661 }, { "epoch": 5.720245398773006, "grad_norm": 0.23285391926765442, "learning_rate": 4.233007830379251e-05, "loss": 0.7470971345901489, "step": 4662 }, { "epoch": 5.721472392638037, "grad_norm": 0.22372585535049438, "learning_rate": 4.232646043375914e-05, "loss": 0.7008240222930908, "step": 4663 }, { "epoch": 5.722699386503067, "grad_norm": 0.25632017850875854, "learning_rate": 4.232284186534557e-05, "loss": 0.6983224749565125, "step": 4664 }, { "epoch": 5.723926380368098, "grad_norm": 0.2629302740097046, "learning_rate": 4.231922259869765e-05, "loss": 0.7693007588386536, "step": 4665 }, { "epoch": 5.725153374233129, "grad_norm": 0.23130999505519867, "learning_rate": 4.231560263396126e-05, "loss": 0.7006580829620361, "step": 4666 }, { "epoch": 5.72638036809816, "grad_norm": 0.22068972885608673, "learning_rate": 4.231198197128231e-05, "loss": 0.9157099723815918, "step": 4667 }, { "epoch": 5.72760736196319, "grad_norm": 0.34262585639953613, "learning_rate": 4.2308360610806755e-05, "loss": 0.5286740064620972, "step": 4668 }, { "epoch": 5.728834355828221, "grad_norm": 0.2321358323097229, "learning_rate": 4.2304738552680533e-05, "loss": 0.8088266849517822, "step": 4669 }, { "epoch": 5.730061349693251, "grad_norm": 0.26999253034591675, "learning_rate": 4.2301115797049663e-05, "loss": 0.6208579540252686, "step": 4670 }, { "epoch": 5.731288343558282, "grad_norm": 0.28029853105545044, "learning_rate": 4.229749234406016e-05, "loss": 0.5271497368812561, "step": 4671 }, { "epoch": 5.732515337423313, "grad_norm": 0.2864380478858948, "learning_rate": 4.229386819385807e-05, "loss": 0.5888645052909851, "step": 4672 }, { "epoch": 5.733742331288344, "grad_norm": 0.26740244030952454, "learning_rate": 4.229024334658948e-05, "loss": 0.6927334666252136, "step": 4673 }, { "epoch": 5.734969325153374, "grad_norm": 0.19324131309986115, "learning_rate": 4.2286617802400493e-05, "loss": 0.7810473442077637, "step": 4674 }, { "epoch": 5.736196319018405, "grad_norm": 0.23157651722431183, "learning_rate": 4.2282991561437246e-05, "loss": 0.9771014451980591, "step": 4675 }, { "epoch": 5.737423312883435, "grad_norm": 0.22618380188941956, "learning_rate": 4.227936462384591e-05, "loss": 0.8645322322845459, "step": 4676 }, { "epoch": 5.738650306748466, "grad_norm": 0.2261282205581665, "learning_rate": 4.2275736989772653e-05, "loss": 1.0513226985931396, "step": 4677 }, { "epoch": 5.739877300613497, "grad_norm": 0.3252642750740051, "learning_rate": 4.227210865936372e-05, "loss": 0.7439576387405396, "step": 4678 }, { "epoch": 5.741104294478528, "grad_norm": 0.26290804147720337, "learning_rate": 4.2268479632765344e-05, "loss": 0.6705437898635864, "step": 4679 }, { "epoch": 5.742331288343558, "grad_norm": 0.21934379637241364, "learning_rate": 4.2264849910123806e-05, "loss": 0.6205381155014038, "step": 4680 }, { "epoch": 5.743558282208589, "grad_norm": 0.2149769514799118, "learning_rate": 4.226121949158541e-05, "loss": 0.8616456985473633, "step": 4681 }, { "epoch": 5.744785276073619, "grad_norm": 0.216855987906456, "learning_rate": 4.225758837729649e-05, "loss": 1.00482976436615, "step": 4682 }, { "epoch": 5.74601226993865, "grad_norm": 0.2307720184326172, "learning_rate": 4.2253956567403396e-05, "loss": 0.8063957691192627, "step": 4683 }, { "epoch": 5.747239263803681, "grad_norm": 0.39356130361557007, "learning_rate": 4.2250324062052526e-05, "loss": 0.5574277639389038, "step": 4684 }, { "epoch": 5.748466257668712, "grad_norm": 0.30874544382095337, "learning_rate": 4.2246690861390294e-05, "loss": 0.6573870182037354, "step": 4685 }, { "epoch": 5.749693251533742, "grad_norm": 0.31612712144851685, "learning_rate": 4.224305696556314e-05, "loss": 0.6229354739189148, "step": 4686 }, { "epoch": 5.750920245398773, "grad_norm": 0.19854973256587982, "learning_rate": 4.223942237471754e-05, "loss": 0.9353561401367188, "step": 4687 }, { "epoch": 5.752147239263803, "grad_norm": 0.28480494022369385, "learning_rate": 4.223578708899999e-05, "loss": 0.8030149936676025, "step": 4688 }, { "epoch": 5.7533742331288344, "grad_norm": 0.20810841023921967, "learning_rate": 4.223215110855703e-05, "loss": 0.8249931931495667, "step": 4689 }, { "epoch": 5.754601226993865, "grad_norm": 0.24928154051303864, "learning_rate": 4.2228514433535195e-05, "loss": 0.7109832167625427, "step": 4690 }, { "epoch": 5.755828220858896, "grad_norm": 0.33074018359184265, "learning_rate": 4.222487706408108e-05, "loss": 0.5992707014083862, "step": 4691 }, { "epoch": 5.757055214723926, "grad_norm": 0.2362688034772873, "learning_rate": 4.222123900034131e-05, "loss": 0.7900612354278564, "step": 4692 }, { "epoch": 5.758282208588957, "grad_norm": 0.30959588289260864, "learning_rate": 4.22176002424625e-05, "loss": 0.5445448160171509, "step": 4693 }, { "epoch": 5.759509202453987, "grad_norm": 0.22028329968452454, "learning_rate": 4.221396079059134e-05, "loss": 0.8731036186218262, "step": 4694 }, { "epoch": 5.7607361963190185, "grad_norm": 0.2592747211456299, "learning_rate": 4.22103206448745e-05, "loss": 0.6143993735313416, "step": 4695 }, { "epoch": 5.76196319018405, "grad_norm": 0.21921710669994354, "learning_rate": 4.220667980545874e-05, "loss": 0.790494441986084, "step": 4696 }, { "epoch": 5.76319018404908, "grad_norm": 0.23854628205299377, "learning_rate": 4.220303827249078e-05, "loss": 0.7869687676429749, "step": 4697 }, { "epoch": 5.76441717791411, "grad_norm": 0.26182615756988525, "learning_rate": 4.219939604611741e-05, "loss": 0.66682368516922, "step": 4698 }, { "epoch": 5.765644171779141, "grad_norm": 0.3070499897003174, "learning_rate": 4.219575312648545e-05, "loss": 0.6030674576759338, "step": 4699 }, { "epoch": 5.766871165644172, "grad_norm": 0.24962463974952698, "learning_rate": 4.2192109513741715e-05, "loss": 0.7564529180526733, "step": 4700 }, { "epoch": 5.7680981595092025, "grad_norm": 0.20591627061367035, "learning_rate": 4.218846520803309e-05, "loss": 0.9477980136871338, "step": 4701 }, { "epoch": 5.769325153374233, "grad_norm": 0.22184781730175018, "learning_rate": 4.218482020950645e-05, "loss": 0.7685465216636658, "step": 4702 }, { "epoch": 5.770552147239264, "grad_norm": 0.3077123463153839, "learning_rate": 4.218117451830872e-05, "loss": 0.6065991520881653, "step": 4703 }, { "epoch": 5.771779141104295, "grad_norm": 0.28226956725120544, "learning_rate": 4.2177528134586844e-05, "loss": 0.7860382795333862, "step": 4704 }, { "epoch": 5.773006134969325, "grad_norm": 0.27476727962493896, "learning_rate": 4.2173881058487795e-05, "loss": 0.8154170513153076, "step": 4705 }, { "epoch": 5.774233128834355, "grad_norm": 0.24517324566841125, "learning_rate": 4.217023329015859e-05, "loss": 0.569506049156189, "step": 4706 }, { "epoch": 5.7754601226993865, "grad_norm": 0.21689598262310028, "learning_rate": 4.216658482974626e-05, "loss": 0.8218492269515991, "step": 4707 }, { "epoch": 5.776687116564418, "grad_norm": 0.3207787871360779, "learning_rate": 4.216293567739784e-05, "loss": 0.7192443609237671, "step": 4708 }, { "epoch": 5.777914110429448, "grad_norm": 0.22887039184570312, "learning_rate": 4.215928583326045e-05, "loss": 0.7045179605484009, "step": 4709 }, { "epoch": 5.779141104294479, "grad_norm": 0.26709726452827454, "learning_rate": 4.215563529748118e-05, "loss": 0.7944498658180237, "step": 4710 }, { "epoch": 5.780368098159509, "grad_norm": 0.328813761472702, "learning_rate": 4.215198407020718e-05, "loss": 0.5631619691848755, "step": 4711 }, { "epoch": 5.78159509202454, "grad_norm": 0.20559196174144745, "learning_rate": 4.2148332151585625e-05, "loss": 0.8070002198219299, "step": 4712 }, { "epoch": 5.7828220858895705, "grad_norm": 0.22692909836769104, "learning_rate": 4.21446795417637e-05, "loss": 0.7945940494537354, "step": 4713 }, { "epoch": 5.784049079754602, "grad_norm": 0.21604566276073456, "learning_rate": 4.214102624088865e-05, "loss": 0.710986852645874, "step": 4714 }, { "epoch": 5.785276073619632, "grad_norm": 0.28958532214164734, "learning_rate": 4.213737224910771e-05, "loss": 0.6460971236228943, "step": 4715 }, { "epoch": 5.786503067484663, "grad_norm": 0.29409539699554443, "learning_rate": 4.2133717566568174e-05, "loss": 0.576470673084259, "step": 4716 }, { "epoch": 5.787730061349693, "grad_norm": 0.2418426275253296, "learning_rate": 4.2130062193417366e-05, "loss": 0.888608455657959, "step": 4717 }, { "epoch": 5.788957055214724, "grad_norm": 0.2294149398803711, "learning_rate": 4.212640612980259e-05, "loss": 0.7700309157371521, "step": 4718 }, { "epoch": 5.7901840490797545, "grad_norm": 0.2245788723230362, "learning_rate": 4.212274937587123e-05, "loss": 0.8842394351959229, "step": 4719 }, { "epoch": 5.791411042944786, "grad_norm": 0.23178865015506744, "learning_rate": 4.211909193177069e-05, "loss": 0.8022234439849854, "step": 4720 }, { "epoch": 5.792638036809816, "grad_norm": 0.24220317602157593, "learning_rate": 4.2115433797648375e-05, "loss": 0.7560256123542786, "step": 4721 }, { "epoch": 5.793865030674847, "grad_norm": 0.2862244248390198, "learning_rate": 4.211177497365173e-05, "loss": 0.6843622326850891, "step": 4722 }, { "epoch": 5.795092024539877, "grad_norm": 0.296661913394928, "learning_rate": 4.210811545992825e-05, "loss": 0.6364086270332336, "step": 4723 }, { "epoch": 5.796319018404908, "grad_norm": 0.2520211338996887, "learning_rate": 4.210445525662542e-05, "loss": 0.8943976163864136, "step": 4724 }, { "epoch": 5.7975460122699385, "grad_norm": 0.2841196358203888, "learning_rate": 4.2100794363890794e-05, "loss": 0.8411802053451538, "step": 4725 }, { "epoch": 5.79877300613497, "grad_norm": 0.2599579095840454, "learning_rate": 4.2097132781871903e-05, "loss": 0.6636034846305847, "step": 4726 }, { "epoch": 5.8, "grad_norm": 0.23766109347343445, "learning_rate": 4.209347051071637e-05, "loss": 0.9045970439910889, "step": 4727 }, { "epoch": 5.801226993865031, "grad_norm": 0.23977024853229523, "learning_rate": 4.208980755057178e-05, "loss": 0.8334743976593018, "step": 4728 }, { "epoch": 5.802453987730061, "grad_norm": 0.22566089034080505, "learning_rate": 4.2086143901585794e-05, "loss": 0.8889467716217041, "step": 4729 }, { "epoch": 5.803680981595092, "grad_norm": 0.25459468364715576, "learning_rate": 4.2082479563906084e-05, "loss": 0.8120489716529846, "step": 4730 }, { "epoch": 5.8049079754601225, "grad_norm": 0.2256019413471222, "learning_rate": 4.2078814537680346e-05, "loss": 0.8439031839370728, "step": 4731 }, { "epoch": 5.806134969325154, "grad_norm": 0.26179826259613037, "learning_rate": 4.207514882305629e-05, "loss": 0.840933084487915, "step": 4732 }, { "epoch": 5.807361963190184, "grad_norm": 0.3043256103992462, "learning_rate": 4.2071482420181694e-05, "loss": 0.6998425722122192, "step": 4733 }, { "epoch": 5.808588957055215, "grad_norm": 0.22988753020763397, "learning_rate": 4.206781532920433e-05, "loss": 0.805991530418396, "step": 4734 }, { "epoch": 5.809815950920245, "grad_norm": 0.2552032768726349, "learning_rate": 4.2064147550272004e-05, "loss": 0.7176180481910706, "step": 4735 }, { "epoch": 5.811042944785276, "grad_norm": 0.4213991165161133, "learning_rate": 4.2060479083532564e-05, "loss": 0.480613648891449, "step": 4736 }, { "epoch": 5.8122699386503065, "grad_norm": 0.365707129240036, "learning_rate": 4.205680992913387e-05, "loss": 0.5840508937835693, "step": 4737 }, { "epoch": 5.813496932515338, "grad_norm": 0.276810884475708, "learning_rate": 4.205314008722381e-05, "loss": 0.7622864246368408, "step": 4738 }, { "epoch": 5.814723926380368, "grad_norm": 0.27330639958381653, "learning_rate": 4.204946955795032e-05, "loss": 0.7021960020065308, "step": 4739 }, { "epoch": 5.815950920245399, "grad_norm": 0.23286332190036774, "learning_rate": 4.204579834146133e-05, "loss": 0.8390681147575378, "step": 4740 }, { "epoch": 5.817177914110429, "grad_norm": 0.2386215180158615, "learning_rate": 4.2042126437904825e-05, "loss": 0.6733173131942749, "step": 4741 }, { "epoch": 5.81840490797546, "grad_norm": 0.2183920443058014, "learning_rate": 4.2038453847428815e-05, "loss": 0.8550019860267639, "step": 4742 }, { "epoch": 5.8196319018404905, "grad_norm": 0.24012814462184906, "learning_rate": 4.203478057018132e-05, "loss": 0.7503499984741211, "step": 4743 }, { "epoch": 5.820858895705522, "grad_norm": 0.30253520607948303, "learning_rate": 4.203110660631041e-05, "loss": 0.7214706540107727, "step": 4744 }, { "epoch": 5.822085889570552, "grad_norm": 0.2959848940372467, "learning_rate": 4.202743195596417e-05, "loss": 0.5839005708694458, "step": 4745 }, { "epoch": 5.823312883435583, "grad_norm": 0.2255070060491562, "learning_rate": 4.2023756619290706e-05, "loss": 0.741228461265564, "step": 4746 }, { "epoch": 5.824539877300613, "grad_norm": 0.2795320749282837, "learning_rate": 4.202008059643817e-05, "loss": 0.8053776621818542, "step": 4747 }, { "epoch": 5.825766871165644, "grad_norm": 0.22639010846614838, "learning_rate": 4.201640388755472e-05, "loss": 0.8887422680854797, "step": 4748 }, { "epoch": 5.8269938650306745, "grad_norm": 0.22909030318260193, "learning_rate": 4.201272649278856e-05, "loss": 0.9372576475143433, "step": 4749 }, { "epoch": 5.828220858895706, "grad_norm": 0.28644856810569763, "learning_rate": 4.200904841228792e-05, "loss": 0.7379813194274902, "step": 4750 }, { "epoch": 5.829447852760736, "grad_norm": 0.22512522339820862, "learning_rate": 4.2005369646201054e-05, "loss": 0.7674429416656494, "step": 4751 }, { "epoch": 5.830674846625767, "grad_norm": 0.20439818501472473, "learning_rate": 4.200169019467624e-05, "loss": 0.8636170625686646, "step": 4752 }, { "epoch": 5.831901840490797, "grad_norm": 0.22481714189052582, "learning_rate": 4.1998010057861784e-05, "loss": 0.7875722050666809, "step": 4753 }, { "epoch": 5.833128834355828, "grad_norm": 0.18159973621368408, "learning_rate": 4.199432923590601e-05, "loss": 0.953383207321167, "step": 4754 }, { "epoch": 5.8343558282208585, "grad_norm": 0.22831496596336365, "learning_rate": 4.199064772895731e-05, "loss": 0.7597968578338623, "step": 4755 }, { "epoch": 5.83558282208589, "grad_norm": 0.25749051570892334, "learning_rate": 4.198696553716406e-05, "loss": 0.7274248600006104, "step": 4756 }, { "epoch": 5.83680981595092, "grad_norm": 0.280661404132843, "learning_rate": 4.1983282660674666e-05, "loss": 0.6445226669311523, "step": 4757 }, { "epoch": 5.838036809815951, "grad_norm": 0.25225239992141724, "learning_rate": 4.197959909963759e-05, "loss": 0.8394676446914673, "step": 4758 }, { "epoch": 5.839263803680982, "grad_norm": 0.25918155908584595, "learning_rate": 4.19759148542013e-05, "loss": 0.7788535356521606, "step": 4759 }, { "epoch": 5.840490797546012, "grad_norm": 0.20408977568149567, "learning_rate": 4.19722299245143e-05, "loss": 0.864876389503479, "step": 4760 }, { "epoch": 5.8417177914110425, "grad_norm": 0.25206130743026733, "learning_rate": 4.1968544310725116e-05, "loss": 0.9268850088119507, "step": 4761 }, { "epoch": 5.842944785276074, "grad_norm": 0.22200430929660797, "learning_rate": 4.196485801298231e-05, "loss": 0.7305852770805359, "step": 4762 }, { "epoch": 5.844171779141105, "grad_norm": 0.3176657557487488, "learning_rate": 4.1961171031434465e-05, "loss": 0.5972616076469421, "step": 4763 }, { "epoch": 5.845398773006135, "grad_norm": 0.24420806765556335, "learning_rate": 4.1957483366230185e-05, "loss": 0.7662495374679565, "step": 4764 }, { "epoch": 5.846625766871165, "grad_norm": 0.19726501405239105, "learning_rate": 4.1953795017518116e-05, "loss": 0.7565969228744507, "step": 4765 }, { "epoch": 5.847852760736196, "grad_norm": 0.2217361330986023, "learning_rate": 4.195010598544693e-05, "loss": 0.8887973427772522, "step": 4766 }, { "epoch": 5.849079754601227, "grad_norm": 0.21542231738567352, "learning_rate": 4.1946416270165305e-05, "loss": 0.8374341726303101, "step": 4767 }, { "epoch": 5.850306748466258, "grad_norm": 0.34109607338905334, "learning_rate": 4.194272587182198e-05, "loss": 0.7800469398498535, "step": 4768 }, { "epoch": 5.851533742331288, "grad_norm": 0.2661769688129425, "learning_rate": 4.19390347905657e-05, "loss": 0.8205044269561768, "step": 4769 }, { "epoch": 5.852760736196319, "grad_norm": 0.22706808149814606, "learning_rate": 4.193534302654523e-05, "loss": 0.9298103451728821, "step": 4770 }, { "epoch": 5.85398773006135, "grad_norm": 0.2525346577167511, "learning_rate": 4.1931650579909385e-05, "loss": 0.8754875659942627, "step": 4771 }, { "epoch": 5.85521472392638, "grad_norm": 0.26242712140083313, "learning_rate": 4.1927957450807e-05, "loss": 0.7422844171524048, "step": 4772 }, { "epoch": 5.856441717791411, "grad_norm": 0.31527191400527954, "learning_rate": 4.1924263639386927e-05, "loss": 0.6126701831817627, "step": 4773 }, { "epoch": 5.857668711656442, "grad_norm": 0.21247953176498413, "learning_rate": 4.192056914579806e-05, "loss": 0.8114482164382935, "step": 4774 }, { "epoch": 5.858895705521473, "grad_norm": 0.30129173398017883, "learning_rate": 4.1916873970189306e-05, "loss": 0.6845991015434265, "step": 4775 }, { "epoch": 5.860122699386503, "grad_norm": 0.19309750199317932, "learning_rate": 4.1913178112709616e-05, "loss": 0.7914758920669556, "step": 4776 }, { "epoch": 5.861349693251534, "grad_norm": 0.28801971673965454, "learning_rate": 4.190948157350795e-05, "loss": 0.649480938911438, "step": 4777 }, { "epoch": 5.862576687116564, "grad_norm": 0.30674463510513306, "learning_rate": 4.190578435273331e-05, "loss": 0.6190224885940552, "step": 4778 }, { "epoch": 5.863803680981595, "grad_norm": 0.2142259031534195, "learning_rate": 4.190208645053472e-05, "loss": 0.7073129415512085, "step": 4779 }, { "epoch": 5.865030674846626, "grad_norm": 0.2754298746585846, "learning_rate": 4.189838786706123e-05, "loss": 0.73399418592453, "step": 4780 }, { "epoch": 5.866257668711657, "grad_norm": 0.20740190148353577, "learning_rate": 4.189468860246192e-05, "loss": 0.8814070224761963, "step": 4781 }, { "epoch": 5.867484662576687, "grad_norm": 0.2207803726196289, "learning_rate": 4.18909886568859e-05, "loss": 0.7613147497177124, "step": 4782 }, { "epoch": 5.868711656441718, "grad_norm": 0.24392226338386536, "learning_rate": 4.18872880304823e-05, "loss": 0.7378922700881958, "step": 4783 }, { "epoch": 5.869938650306748, "grad_norm": 0.2750208377838135, "learning_rate": 4.1883586723400284e-05, "loss": 0.7565860748291016, "step": 4784 }, { "epoch": 5.871165644171779, "grad_norm": 0.27438434958457947, "learning_rate": 4.187988473578904e-05, "loss": 0.6173732280731201, "step": 4785 }, { "epoch": 5.87239263803681, "grad_norm": 0.24064888060092926, "learning_rate": 4.1876182067797786e-05, "loss": 0.8143079876899719, "step": 4786 }, { "epoch": 5.873619631901841, "grad_norm": 0.24556002020835876, "learning_rate": 4.187247871957577e-05, "loss": 0.651939868927002, "step": 4787 }, { "epoch": 5.874846625766871, "grad_norm": 0.2328948676586151, "learning_rate": 4.186877469127225e-05, "loss": 0.8244158625602722, "step": 4788 }, { "epoch": 5.876073619631902, "grad_norm": 0.21914350986480713, "learning_rate": 4.1865069983036544e-05, "loss": 0.7897207736968994, "step": 4789 }, { "epoch": 5.877300613496932, "grad_norm": 0.21395082771778107, "learning_rate": 4.186136459501796e-05, "loss": 0.823290228843689, "step": 4790 }, { "epoch": 5.8785276073619634, "grad_norm": 0.23528161644935608, "learning_rate": 4.185765852736585e-05, "loss": 0.8716672658920288, "step": 4791 }, { "epoch": 5.879754601226994, "grad_norm": 0.28866612911224365, "learning_rate": 4.185395178022962e-05, "loss": 0.8131871223449707, "step": 4792 }, { "epoch": 5.880981595092025, "grad_norm": 0.2600838541984558, "learning_rate": 4.1850244353758664e-05, "loss": 0.682589054107666, "step": 4793 }, { "epoch": 5.882208588957055, "grad_norm": 0.35595425963401794, "learning_rate": 4.1846536248102406e-05, "loss": 0.6243711709976196, "step": 4794 }, { "epoch": 5.883435582822086, "grad_norm": 0.24508342146873474, "learning_rate": 4.184282746341032e-05, "loss": 0.8113642334938049, "step": 4795 }, { "epoch": 5.884662576687116, "grad_norm": 0.20722322165966034, "learning_rate": 4.18391179998319e-05, "loss": 0.8201395273208618, "step": 4796 }, { "epoch": 5.8858895705521475, "grad_norm": 0.21217510104179382, "learning_rate": 4.183540785751666e-05, "loss": 0.725237250328064, "step": 4797 }, { "epoch": 5.887116564417178, "grad_norm": 0.27331528067588806, "learning_rate": 4.183169703661415e-05, "loss": 0.7457935810089111, "step": 4798 }, { "epoch": 5.888343558282209, "grad_norm": 0.27120500802993774, "learning_rate": 4.1827985537273926e-05, "loss": 0.7159783244132996, "step": 4799 }, { "epoch": 5.889570552147239, "grad_norm": 0.2270524650812149, "learning_rate": 4.1824273359645614e-05, "loss": 0.7578986883163452, "step": 4800 }, { "epoch": 5.89079754601227, "grad_norm": 0.3244016766548157, "learning_rate": 4.182056050387882e-05, "loss": 0.5681353807449341, "step": 4801 }, { "epoch": 5.8920245398773, "grad_norm": 0.20430609583854675, "learning_rate": 4.18168469701232e-05, "loss": 0.7987333536148071, "step": 4802 }, { "epoch": 5.8932515337423315, "grad_norm": 0.27577972412109375, "learning_rate": 4.1813132758528455e-05, "loss": 0.6343237161636353, "step": 4803 }, { "epoch": 5.894478527607362, "grad_norm": 0.30660662055015564, "learning_rate": 4.180941786924427e-05, "loss": 0.732053279876709, "step": 4804 }, { "epoch": 5.895705521472393, "grad_norm": 0.2695466876029968, "learning_rate": 4.18057023024204e-05, "loss": 0.7613376975059509, "step": 4805 }, { "epoch": 5.896932515337423, "grad_norm": 0.2261434644460678, "learning_rate": 4.18019860582066e-05, "loss": 0.82884681224823, "step": 4806 }, { "epoch": 5.898159509202454, "grad_norm": 0.2568039000034332, "learning_rate": 4.179826913675266e-05, "loss": 0.6261295080184937, "step": 4807 }, { "epoch": 5.899386503067484, "grad_norm": 0.28282737731933594, "learning_rate": 4.17945515382084e-05, "loss": 0.627068042755127, "step": 4808 }, { "epoch": 5.9006134969325155, "grad_norm": 0.33293455839157104, "learning_rate": 4.179083326272367e-05, "loss": 0.7435364723205566, "step": 4809 }, { "epoch": 5.901840490797546, "grad_norm": 0.25140145421028137, "learning_rate": 4.178711431044834e-05, "loss": 0.6609475612640381, "step": 4810 }, { "epoch": 5.903067484662577, "grad_norm": 0.22663168609142303, "learning_rate": 4.178339468153231e-05, "loss": 0.8431714773178101, "step": 4811 }, { "epoch": 5.904294478527607, "grad_norm": 0.2638488709926605, "learning_rate": 4.1779674376125514e-05, "loss": 0.7532932162284851, "step": 4812 }, { "epoch": 5.905521472392638, "grad_norm": 0.23689812421798706, "learning_rate": 4.177595339437789e-05, "loss": 0.7473230361938477, "step": 4813 }, { "epoch": 5.906748466257668, "grad_norm": 0.2621968686580658, "learning_rate": 4.177223173643944e-05, "loss": 0.6533206701278687, "step": 4814 }, { "epoch": 5.9079754601226995, "grad_norm": 0.2577391266822815, "learning_rate": 4.176850940246016e-05, "loss": 0.6465920805931091, "step": 4815 }, { "epoch": 5.90920245398773, "grad_norm": 0.3159538507461548, "learning_rate": 4.1764786392590086e-05, "loss": 0.5570228695869446, "step": 4816 }, { "epoch": 5.910429447852761, "grad_norm": 0.31404730677604675, "learning_rate": 4.1761062706979294e-05, "loss": 0.5846728086471558, "step": 4817 }, { "epoch": 5.911656441717791, "grad_norm": 0.31783950328826904, "learning_rate": 4.1757338345777875e-05, "loss": 0.6640911102294922, "step": 4818 }, { "epoch": 5.912883435582822, "grad_norm": 0.280892938375473, "learning_rate": 4.175361330913593e-05, "loss": 0.7469580173492432, "step": 4819 }, { "epoch": 5.914110429447852, "grad_norm": 0.333584725856781, "learning_rate": 4.174988759720362e-05, "loss": 0.6270435452461243, "step": 4820 }, { "epoch": 5.9153374233128835, "grad_norm": 0.24127255380153656, "learning_rate": 4.174616121013111e-05, "loss": 0.8237810134887695, "step": 4821 }, { "epoch": 5.916564417177915, "grad_norm": 0.22907480597496033, "learning_rate": 4.174243414806861e-05, "loss": 0.6752328276634216, "step": 4822 }, { "epoch": 5.917791411042945, "grad_norm": 0.2258826494216919, "learning_rate": 4.173870641116633e-05, "loss": 0.8654752969741821, "step": 4823 }, { "epoch": 5.919018404907975, "grad_norm": 0.2351427674293518, "learning_rate": 4.1734977999574545e-05, "loss": 0.8121424913406372, "step": 4824 }, { "epoch": 5.920245398773006, "grad_norm": 0.22304056584835052, "learning_rate": 4.1731248913443524e-05, "loss": 0.7697080373764038, "step": 4825 }, { "epoch": 5.921472392638037, "grad_norm": 0.23707669973373413, "learning_rate": 4.1727519152923574e-05, "loss": 0.6596053242683411, "step": 4826 }, { "epoch": 5.9226993865030675, "grad_norm": 0.21200305223464966, "learning_rate": 4.172378871816504e-05, "loss": 0.7409518361091614, "step": 4827 }, { "epoch": 5.923926380368098, "grad_norm": 0.21026159822940826, "learning_rate": 4.172005760931827e-05, "loss": 0.8097739219665527, "step": 4828 }, { "epoch": 5.925153374233129, "grad_norm": 0.27456673979759216, "learning_rate": 4.171632582653367e-05, "loss": 0.8611752986907959, "step": 4829 }, { "epoch": 5.92638036809816, "grad_norm": 0.2498229742050171, "learning_rate": 4.171259336996166e-05, "loss": 0.7193318605422974, "step": 4830 }, { "epoch": 5.92760736196319, "grad_norm": 0.25401365756988525, "learning_rate": 4.170886023975267e-05, "loss": 0.6831789016723633, "step": 4831 }, { "epoch": 5.92883435582822, "grad_norm": 0.2750152051448822, "learning_rate": 4.170512643605718e-05, "loss": 0.6668494939804077, "step": 4832 }, { "epoch": 5.9300613496932515, "grad_norm": 0.20772086083889008, "learning_rate": 4.1701391959025694e-05, "loss": 0.6557914018630981, "step": 4833 }, { "epoch": 5.931288343558283, "grad_norm": 0.2517245411872864, "learning_rate": 4.169765680880873e-05, "loss": 0.843478798866272, "step": 4834 }, { "epoch": 5.932515337423313, "grad_norm": 0.2451508343219757, "learning_rate": 4.1693920985556836e-05, "loss": 0.8805540800094604, "step": 4835 }, { "epoch": 5.933742331288344, "grad_norm": 0.2813839018344879, "learning_rate": 4.16901844894206e-05, "loss": 0.7307500839233398, "step": 4836 }, { "epoch": 5.934969325153374, "grad_norm": 0.2847899794578552, "learning_rate": 4.168644732055064e-05, "loss": 0.6488633155822754, "step": 4837 }, { "epoch": 5.936196319018405, "grad_norm": 0.1932392120361328, "learning_rate": 4.168270947909757e-05, "loss": 0.8584529161453247, "step": 4838 }, { "epoch": 5.9374233128834355, "grad_norm": 0.2770220935344696, "learning_rate": 4.167897096521206e-05, "loss": 0.697378396987915, "step": 4839 }, { "epoch": 5.938650306748467, "grad_norm": 0.2662948966026306, "learning_rate": 4.167523177904481e-05, "loss": 0.6735595464706421, "step": 4840 }, { "epoch": 5.939877300613497, "grad_norm": 0.2435380220413208, "learning_rate": 4.167149192074652e-05, "loss": 0.8382214307785034, "step": 4841 }, { "epoch": 5.941104294478528, "grad_norm": 0.2512197494506836, "learning_rate": 4.166775139046793e-05, "loss": 0.8619201183319092, "step": 4842 }, { "epoch": 5.942331288343558, "grad_norm": 0.21491719782352448, "learning_rate": 4.166401018835984e-05, "loss": 0.6187548041343689, "step": 4843 }, { "epoch": 5.943558282208589, "grad_norm": 0.2665027379989624, "learning_rate": 4.166026831457302e-05, "loss": 0.535519540309906, "step": 4844 }, { "epoch": 5.9447852760736195, "grad_norm": 0.21720539033412933, "learning_rate": 4.165652576925829e-05, "loss": 0.9344935417175293, "step": 4845 }, { "epoch": 5.946012269938651, "grad_norm": 0.31934207677841187, "learning_rate": 4.165278255256652e-05, "loss": 0.528060793876648, "step": 4846 }, { "epoch": 5.947239263803681, "grad_norm": 0.274192214012146, "learning_rate": 4.164903866464858e-05, "loss": 0.5254701375961304, "step": 4847 }, { "epoch": 5.948466257668712, "grad_norm": 0.33141180872917175, "learning_rate": 4.1645294105655376e-05, "loss": 0.6723737120628357, "step": 4848 }, { "epoch": 5.949693251533742, "grad_norm": 0.27905285358428955, "learning_rate": 4.164154887573783e-05, "loss": 0.5906966924667358, "step": 4849 }, { "epoch": 5.950920245398773, "grad_norm": 0.22732530534267426, "learning_rate": 4.163780297504693e-05, "loss": 0.8041701912879944, "step": 4850 }, { "epoch": 5.9521472392638035, "grad_norm": 0.32461512088775635, "learning_rate": 4.1634056403733634e-05, "loss": 0.676436185836792, "step": 4851 }, { "epoch": 5.953374233128835, "grad_norm": 0.30753785371780396, "learning_rate": 4.163030916194897e-05, "loss": 0.6911284923553467, "step": 4852 }, { "epoch": 5.954601226993865, "grad_norm": 0.21836969256401062, "learning_rate": 4.1626561249843974e-05, "loss": 0.8233866691589355, "step": 4853 }, { "epoch": 5.955828220858896, "grad_norm": 0.24521461129188538, "learning_rate": 4.1622812667569715e-05, "loss": 0.7283881306648254, "step": 4854 }, { "epoch": 5.957055214723926, "grad_norm": 0.22089971601963043, "learning_rate": 4.1619063415277284e-05, "loss": 0.7424062490463257, "step": 4855 }, { "epoch": 5.958282208588957, "grad_norm": 0.25845709443092346, "learning_rate": 4.161531349311781e-05, "loss": 0.856850266456604, "step": 4856 }, { "epoch": 5.9595092024539875, "grad_norm": 0.27923133969306946, "learning_rate": 4.161156290124244e-05, "loss": 0.7754940986633301, "step": 4857 }, { "epoch": 5.960736196319019, "grad_norm": 0.3168666660785675, "learning_rate": 4.160781163980234e-05, "loss": 0.8582627773284912, "step": 4858 }, { "epoch": 5.961963190184049, "grad_norm": 0.2285081297159195, "learning_rate": 4.160405970894872e-05, "loss": 0.877805233001709, "step": 4859 }, { "epoch": 5.96319018404908, "grad_norm": 0.2292224019765854, "learning_rate": 4.160030710883281e-05, "loss": 0.8331773281097412, "step": 4860 }, { "epoch": 5.96441717791411, "grad_norm": 0.2554776668548584, "learning_rate": 4.159655383960587e-05, "loss": 0.6886364817619324, "step": 4861 }, { "epoch": 5.965644171779141, "grad_norm": 0.2600645422935486, "learning_rate": 4.1592799901419174e-05, "loss": 0.7265744209289551, "step": 4862 }, { "epoch": 5.9668711656441715, "grad_norm": 0.20997925102710724, "learning_rate": 4.1589045294424045e-05, "loss": 0.9353628158569336, "step": 4863 }, { "epoch": 5.968098159509203, "grad_norm": 0.2567557692527771, "learning_rate": 4.158529001877181e-05, "loss": 0.712440550327301, "step": 4864 }, { "epoch": 5.969325153374233, "grad_norm": 0.30492541193962097, "learning_rate": 4.158153407461384e-05, "loss": 0.618342399597168, "step": 4865 }, { "epoch": 5.970552147239264, "grad_norm": 0.2556605041027069, "learning_rate": 4.157777746210152e-05, "loss": 0.7644118666648865, "step": 4866 }, { "epoch": 5.971779141104294, "grad_norm": 0.22472849488258362, "learning_rate": 4.157402018138627e-05, "loss": 0.8371933698654175, "step": 4867 }, { "epoch": 5.973006134969325, "grad_norm": 0.3307766616344452, "learning_rate": 4.157026223261954e-05, "loss": 0.7033059000968933, "step": 4868 }, { "epoch": 5.9742331288343555, "grad_norm": 0.2781734764575958, "learning_rate": 4.15665036159528e-05, "loss": 0.6501166820526123, "step": 4869 }, { "epoch": 5.975460122699387, "grad_norm": 0.2428225427865982, "learning_rate": 4.156274433153755e-05, "loss": 0.6827386617660522, "step": 4870 }, { "epoch": 5.976687116564417, "grad_norm": 0.24000854790210724, "learning_rate": 4.15589843795253e-05, "loss": 0.7016529440879822, "step": 4871 }, { "epoch": 5.977914110429448, "grad_norm": 0.27764710783958435, "learning_rate": 4.155522376006763e-05, "loss": 0.6556406021118164, "step": 4872 }, { "epoch": 5.979141104294478, "grad_norm": 0.2330889254808426, "learning_rate": 4.155146247331611e-05, "loss": 0.9013257026672363, "step": 4873 }, { "epoch": 5.980368098159509, "grad_norm": 0.2413112074136734, "learning_rate": 4.154770051942234e-05, "loss": 0.9881879091262817, "step": 4874 }, { "epoch": 5.9815950920245395, "grad_norm": 0.20725201070308685, "learning_rate": 4.154393789853795e-05, "loss": 0.7812767028808594, "step": 4875 }, { "epoch": 5.982822085889571, "grad_norm": 0.28786739706993103, "learning_rate": 4.1540174610814616e-05, "loss": 0.6117138862609863, "step": 4876 }, { "epoch": 5.984049079754601, "grad_norm": 0.20901517570018768, "learning_rate": 4.153641065640402e-05, "loss": 0.8470836877822876, "step": 4877 }, { "epoch": 5.985276073619632, "grad_norm": 0.22869329154491425, "learning_rate": 4.153264603545788e-05, "loss": 0.732661247253418, "step": 4878 }, { "epoch": 5.986503067484662, "grad_norm": 0.21166007220745087, "learning_rate": 4.152888074812792e-05, "loss": 0.8795293569564819, "step": 4879 }, { "epoch": 5.987730061349693, "grad_norm": 0.37854626774787903, "learning_rate": 4.152511479456592e-05, "loss": 0.5390317440032959, "step": 4880 }, { "epoch": 5.9889570552147235, "grad_norm": 0.26539182662963867, "learning_rate": 4.152134817492368e-05, "loss": 0.6074455976486206, "step": 4881 }, { "epoch": 5.990184049079755, "grad_norm": 0.2493753731250763, "learning_rate": 4.1517580889353006e-05, "loss": 0.826339602470398, "step": 4882 }, { "epoch": 5.991411042944785, "grad_norm": 0.2545546889305115, "learning_rate": 4.151381293800575e-05, "loss": 0.6630108952522278, "step": 4883 }, { "epoch": 5.992638036809816, "grad_norm": 0.25069287419319153, "learning_rate": 4.15100443210338e-05, "loss": 0.8002126812934875, "step": 4884 }, { "epoch": 5.993865030674847, "grad_norm": 0.2676278352737427, "learning_rate": 4.150627503858906e-05, "loss": 0.7305550575256348, "step": 4885 }, { "epoch": 5.995092024539877, "grad_norm": 0.28580784797668457, "learning_rate": 4.1502505090823445e-05, "loss": 0.7055286169052124, "step": 4886 }, { "epoch": 5.9963190184049076, "grad_norm": 0.21631094813346863, "learning_rate": 4.1498734477888924e-05, "loss": 0.8239426612854004, "step": 4887 }, { "epoch": 5.997546012269939, "grad_norm": 0.2457408905029297, "learning_rate": 4.149496319993746e-05, "loss": 0.7516425848007202, "step": 4888 }, { "epoch": 5.99877300613497, "grad_norm": 0.2711232304573059, "learning_rate": 4.1491191257121075e-05, "loss": 0.7092045545578003, "step": 4889 }, { "epoch": 6.0, "grad_norm": 0.23765087127685547, "learning_rate": 4.148741864959181e-05, "loss": 0.7522321939468384, "step": 4890 }, { "epoch": 6.001226993865031, "grad_norm": 0.18203361332416534, "learning_rate": 4.148364537750172e-05, "loss": 0.8116693496704102, "step": 4891 }, { "epoch": 6.002453987730061, "grad_norm": 0.3730311095714569, "learning_rate": 4.14798714410029e-05, "loss": 0.5749507546424866, "step": 4892 }, { "epoch": 6.0036809815950924, "grad_norm": 0.2701694369316101, "learning_rate": 4.147609684024747e-05, "loss": 0.7231317758560181, "step": 4893 }, { "epoch": 6.004907975460123, "grad_norm": 0.22997036576271057, "learning_rate": 4.147232157538756e-05, "loss": 0.63939368724823, "step": 4894 }, { "epoch": 6.006134969325154, "grad_norm": 0.2501993775367737, "learning_rate": 4.1468545646575344e-05, "loss": 0.7494592666625977, "step": 4895 }, { "epoch": 6.007361963190184, "grad_norm": 0.2520042657852173, "learning_rate": 4.146476905396303e-05, "loss": 0.6915676593780518, "step": 4896 }, { "epoch": 6.008588957055215, "grad_norm": 0.2287764996290207, "learning_rate": 4.146099179770283e-05, "loss": 0.6443613171577454, "step": 4897 }, { "epoch": 6.009815950920245, "grad_norm": 0.2028621882200241, "learning_rate": 4.145721387794701e-05, "loss": 0.8153035640716553, "step": 4898 }, { "epoch": 6.0110429447852765, "grad_norm": 0.23689600825309753, "learning_rate": 4.145343529484782e-05, "loss": 0.830392599105835, "step": 4899 }, { "epoch": 6.012269938650307, "grad_norm": 0.2183445543050766, "learning_rate": 4.144965604855759e-05, "loss": 0.9124890565872192, "step": 4900 }, { "epoch": 6.013496932515338, "grad_norm": 0.23996856808662415, "learning_rate": 4.144587613922863e-05, "loss": 0.688453197479248, "step": 4901 }, { "epoch": 6.014723926380368, "grad_norm": 0.2677255868911743, "learning_rate": 4.144209556701333e-05, "loss": 0.7142581939697266, "step": 4902 }, { "epoch": 6.015950920245399, "grad_norm": 0.22557181119918823, "learning_rate": 4.143831433206403e-05, "loss": 0.7310566902160645, "step": 4903 }, { "epoch": 6.017177914110429, "grad_norm": 0.2694237530231476, "learning_rate": 4.1434532434533166e-05, "loss": 0.6058180332183838, "step": 4904 }, { "epoch": 6.0184049079754605, "grad_norm": 0.24259968101978302, "learning_rate": 4.1430749874573184e-05, "loss": 0.729343593120575, "step": 4905 }, { "epoch": 6.019631901840491, "grad_norm": 0.25931230187416077, "learning_rate": 4.142696665233653e-05, "loss": 0.5646173357963562, "step": 4906 }, { "epoch": 6.020858895705522, "grad_norm": 0.2971172332763672, "learning_rate": 4.14231827679757e-05, "loss": 0.689173698425293, "step": 4907 }, { "epoch": 6.022085889570552, "grad_norm": 0.20937426388263702, "learning_rate": 4.141939822164321e-05, "loss": 0.8421022891998291, "step": 4908 }, { "epoch": 6.023312883435583, "grad_norm": 0.2917129397392273, "learning_rate": 4.1415613013491615e-05, "loss": 0.7380911111831665, "step": 4909 }, { "epoch": 6.024539877300613, "grad_norm": 0.26453715562820435, "learning_rate": 4.1411827143673474e-05, "loss": 0.7768704891204834, "step": 4910 }, { "epoch": 6.0257668711656445, "grad_norm": 0.2568628787994385, "learning_rate": 4.140804061234139e-05, "loss": 0.7645958662033081, "step": 4911 }, { "epoch": 6.026993865030675, "grad_norm": 0.28972071409225464, "learning_rate": 4.1404253419647983e-05, "loss": 0.43499457836151123, "step": 4912 }, { "epoch": 6.028220858895706, "grad_norm": 0.21905680000782013, "learning_rate": 4.140046556574591e-05, "loss": 0.7613681554794312, "step": 4913 }, { "epoch": 6.029447852760736, "grad_norm": 0.22171138226985931, "learning_rate": 4.1396677050787845e-05, "loss": 0.7973269820213318, "step": 4914 }, { "epoch": 6.030674846625767, "grad_norm": 0.22772325575351715, "learning_rate": 4.1392887874926494e-05, "loss": 0.8842129707336426, "step": 4915 }, { "epoch": 6.031901840490797, "grad_norm": 0.35117802023887634, "learning_rate": 4.138909803831458e-05, "loss": 0.5075373649597168, "step": 4916 }, { "epoch": 6.0331288343558285, "grad_norm": 0.23417362570762634, "learning_rate": 4.138530754110488e-05, "loss": 0.8230351209640503, "step": 4917 }, { "epoch": 6.034355828220859, "grad_norm": 0.2511827349662781, "learning_rate": 4.1381516383450156e-05, "loss": 0.7702747583389282, "step": 4918 }, { "epoch": 6.03558282208589, "grad_norm": 0.23086008429527283, "learning_rate": 4.137772456550323e-05, "loss": 0.7932368516921997, "step": 4919 }, { "epoch": 6.03680981595092, "grad_norm": 0.22765202820301056, "learning_rate": 4.137393208741693e-05, "loss": 0.6548484563827515, "step": 4920 }, { "epoch": 6.038036809815951, "grad_norm": 0.2581900656223297, "learning_rate": 4.137013894934414e-05, "loss": 0.754426121711731, "step": 4921 }, { "epoch": 6.039263803680981, "grad_norm": 0.2681885063648224, "learning_rate": 4.1366345151437735e-05, "loss": 0.603723406791687, "step": 4922 }, { "epoch": 6.0404907975460125, "grad_norm": 0.24848195910453796, "learning_rate": 4.136255069385063e-05, "loss": 0.816351592540741, "step": 4923 }, { "epoch": 6.041717791411043, "grad_norm": 0.3413611054420471, "learning_rate": 4.135875557673578e-05, "loss": 0.48665133118629456, "step": 4924 }, { "epoch": 6.042944785276074, "grad_norm": 0.33307668566703796, "learning_rate": 4.135495980024615e-05, "loss": 0.5421522259712219, "step": 4925 }, { "epoch": 6.044171779141104, "grad_norm": 0.26657912135124207, "learning_rate": 4.135116336453473e-05, "loss": 0.7976627349853516, "step": 4926 }, { "epoch": 6.045398773006135, "grad_norm": 0.21635869145393372, "learning_rate": 4.134736626975455e-05, "loss": 0.785549521446228, "step": 4927 }, { "epoch": 6.046625766871165, "grad_norm": 0.27870863676071167, "learning_rate": 4.134356851605866e-05, "loss": 0.530827522277832, "step": 4928 }, { "epoch": 6.0478527607361965, "grad_norm": 0.22276216745376587, "learning_rate": 4.1339770103600144e-05, "loss": 0.6993886232376099, "step": 4929 }, { "epoch": 6.049079754601227, "grad_norm": 0.22546331584453583, "learning_rate": 4.1335971032532095e-05, "loss": 0.8935530185699463, "step": 4930 }, { "epoch": 6.050306748466258, "grad_norm": 0.23117752373218536, "learning_rate": 4.133217130300764e-05, "loss": 0.8537427186965942, "step": 4931 }, { "epoch": 6.051533742331288, "grad_norm": 0.25498294830322266, "learning_rate": 4.1328370915179946e-05, "loss": 0.7234341502189636, "step": 4932 }, { "epoch": 6.052760736196319, "grad_norm": 0.2675793468952179, "learning_rate": 4.132456986920219e-05, "loss": 0.5502943992614746, "step": 4933 }, { "epoch": 6.053987730061349, "grad_norm": 0.28558632731437683, "learning_rate": 4.132076816522758e-05, "loss": 0.5559278726577759, "step": 4934 }, { "epoch": 6.0552147239263805, "grad_norm": 0.2318490445613861, "learning_rate": 4.131696580340935e-05, "loss": 0.7666807174682617, "step": 4935 }, { "epoch": 6.056441717791411, "grad_norm": 0.2533893287181854, "learning_rate": 4.1313162783900775e-05, "loss": 0.6706361770629883, "step": 4936 }, { "epoch": 6.057668711656442, "grad_norm": 0.223909392952919, "learning_rate": 4.1309359106855126e-05, "loss": 0.705473780632019, "step": 4937 }, { "epoch": 6.058895705521472, "grad_norm": 0.2709785997867584, "learning_rate": 4.1305554772425723e-05, "loss": 0.7426307797431946, "step": 4938 }, { "epoch": 6.060122699386503, "grad_norm": 0.2643663287162781, "learning_rate": 4.130174978076592e-05, "loss": 0.8356521129608154, "step": 4939 }, { "epoch": 6.061349693251533, "grad_norm": 0.23674403131008148, "learning_rate": 4.1297944132029075e-05, "loss": 0.6575706005096436, "step": 4940 }, { "epoch": 6.0625766871165645, "grad_norm": 0.21483290195465088, "learning_rate": 4.129413782636859e-05, "loss": 0.8597002625465393, "step": 4941 }, { "epoch": 6.063803680981595, "grad_norm": 0.31269627809524536, "learning_rate": 4.129033086393788e-05, "loss": 0.8323042392730713, "step": 4942 }, { "epoch": 6.065030674846626, "grad_norm": 0.2688845992088318, "learning_rate": 4.128652324489039e-05, "loss": 0.7245582342147827, "step": 4943 }, { "epoch": 6.066257668711656, "grad_norm": 0.298125684261322, "learning_rate": 4.12827149693796e-05, "loss": 0.5101884603500366, "step": 4944 }, { "epoch": 6.067484662576687, "grad_norm": 0.20744340121746063, "learning_rate": 4.127890603755902e-05, "loss": 0.8504332900047302, "step": 4945 }, { "epoch": 6.068711656441717, "grad_norm": 0.2727969288825989, "learning_rate": 4.1275096449582154e-05, "loss": 0.6522331237792969, "step": 4946 }, { "epoch": 6.0699386503067485, "grad_norm": 0.22564905881881714, "learning_rate": 4.1271286205602565e-05, "loss": 0.696049690246582, "step": 4947 }, { "epoch": 6.071165644171779, "grad_norm": 0.24335268139839172, "learning_rate": 4.126747530577385e-05, "loss": 0.7765539288520813, "step": 4948 }, { "epoch": 6.07239263803681, "grad_norm": 0.23228013515472412, "learning_rate": 4.126366375024959e-05, "loss": 0.8902037143707275, "step": 4949 }, { "epoch": 6.07361963190184, "grad_norm": 0.23613430559635162, "learning_rate": 4.125985153918344e-05, "loss": 0.7525965571403503, "step": 4950 }, { "epoch": 6.074846625766871, "grad_norm": 0.24303236603736877, "learning_rate": 4.1256038672729034e-05, "loss": 0.7607641220092773, "step": 4951 }, { "epoch": 6.076073619631902, "grad_norm": 0.21085362136363983, "learning_rate": 4.125222515104008e-05, "loss": 0.7320361137390137, "step": 4952 }, { "epoch": 6.0773006134969325, "grad_norm": 0.266781747341156, "learning_rate": 4.124841097427029e-05, "loss": 0.5852984189987183, "step": 4953 }, { "epoch": 6.078527607361964, "grad_norm": 0.2822149693965912, "learning_rate": 4.124459614257339e-05, "loss": 0.7329820394515991, "step": 4954 }, { "epoch": 6.079754601226994, "grad_norm": 0.2375662922859192, "learning_rate": 4.124078065610315e-05, "loss": 0.7538645267486572, "step": 4955 }, { "epoch": 6.080981595092025, "grad_norm": 0.22656501829624176, "learning_rate": 4.1236964515013366e-05, "loss": 0.8077552318572998, "step": 4956 }, { "epoch": 6.082208588957055, "grad_norm": 0.2683943808078766, "learning_rate": 4.1233147719457844e-05, "loss": 0.694171667098999, "step": 4957 }, { "epoch": 6.083435582822086, "grad_norm": 0.21352969110012054, "learning_rate": 4.122933026959044e-05, "loss": 0.7666447162628174, "step": 4958 }, { "epoch": 6.0846625766871165, "grad_norm": 0.27984923124313354, "learning_rate": 4.1225512165565024e-05, "loss": 0.8159143924713135, "step": 4959 }, { "epoch": 6.085889570552148, "grad_norm": 0.22968511283397675, "learning_rate": 4.1221693407535475e-05, "loss": 0.8182836174964905, "step": 4960 }, { "epoch": 6.087116564417178, "grad_norm": 0.27547991275787354, "learning_rate": 4.1217873995655744e-05, "loss": 0.5828187465667725, "step": 4961 }, { "epoch": 6.088343558282209, "grad_norm": 0.27501311898231506, "learning_rate": 4.1214053930079764e-05, "loss": 0.7605500817298889, "step": 4962 }, { "epoch": 6.089570552147239, "grad_norm": 0.333322137594223, "learning_rate": 4.121023321096152e-05, "loss": 0.641416609287262, "step": 4963 }, { "epoch": 6.09079754601227, "grad_norm": 0.25518372654914856, "learning_rate": 4.1206411838455e-05, "loss": 0.7856305837631226, "step": 4964 }, { "epoch": 6.0920245398773005, "grad_norm": 0.23744727671146393, "learning_rate": 4.120258981271424e-05, "loss": 0.7757415771484375, "step": 4965 }, { "epoch": 6.093251533742332, "grad_norm": 0.21946333348751068, "learning_rate": 4.11987671338933e-05, "loss": 0.7692431211471558, "step": 4966 }, { "epoch": 6.094478527607362, "grad_norm": 0.25338345766067505, "learning_rate": 4.119494380214626e-05, "loss": 0.7052517533302307, "step": 4967 }, { "epoch": 6.095705521472393, "grad_norm": 0.24751605093479156, "learning_rate": 4.119111981762722e-05, "loss": 0.6465108394622803, "step": 4968 }, { "epoch": 6.096932515337423, "grad_norm": 0.29373833537101746, "learning_rate": 4.118729518049032e-05, "loss": 0.733720064163208, "step": 4969 }, { "epoch": 6.098159509202454, "grad_norm": 0.24620676040649414, "learning_rate": 4.118346989088972e-05, "loss": 0.7373461723327637, "step": 4970 }, { "epoch": 6.0993865030674845, "grad_norm": 0.2463599145412445, "learning_rate": 4.11796439489796e-05, "loss": 0.8306390047073364, "step": 4971 }, { "epoch": 6.100613496932516, "grad_norm": 0.2638508081436157, "learning_rate": 4.1175817354914184e-05, "loss": 0.5746856927871704, "step": 4972 }, { "epoch": 6.101840490797546, "grad_norm": 0.230902761220932, "learning_rate": 4.11719901088477e-05, "loss": 0.8345330953598022, "step": 4973 }, { "epoch": 6.103067484662577, "grad_norm": 0.2567826211452484, "learning_rate": 4.1168162210934425e-05, "loss": 0.7482488751411438, "step": 4974 }, { "epoch": 6.104294478527607, "grad_norm": 0.2854488790035248, "learning_rate": 4.116433366132865e-05, "loss": 0.6336814165115356, "step": 4975 }, { "epoch": 6.105521472392638, "grad_norm": 0.3390377163887024, "learning_rate": 4.116050446018467e-05, "loss": 0.6382855176925659, "step": 4976 }, { "epoch": 6.1067484662576685, "grad_norm": 0.29517680406570435, "learning_rate": 4.1156674607656856e-05, "loss": 0.5224463939666748, "step": 4977 }, { "epoch": 6.1079754601227, "grad_norm": 0.26675906777381897, "learning_rate": 4.1152844103899566e-05, "loss": 0.7615743279457092, "step": 4978 }, { "epoch": 6.10920245398773, "grad_norm": 0.2718263864517212, "learning_rate": 4.114901294906721e-05, "loss": 0.7503135204315186, "step": 4979 }, { "epoch": 6.110429447852761, "grad_norm": 0.2991044223308563, "learning_rate": 4.1145181143314195e-05, "loss": 0.5842733383178711, "step": 4980 }, { "epoch": 6.111656441717791, "grad_norm": 0.2380835860967636, "learning_rate": 4.1141348686794964e-05, "loss": 0.632466197013855, "step": 4981 }, { "epoch": 6.112883435582822, "grad_norm": 0.20910577476024628, "learning_rate": 4.113751557966401e-05, "loss": 0.7653254866600037, "step": 4982 }, { "epoch": 6.1141104294478525, "grad_norm": 0.22128444910049438, "learning_rate": 4.113368182207583e-05, "loss": 0.8188170194625854, "step": 4983 }, { "epoch": 6.115337423312884, "grad_norm": 0.25844651460647583, "learning_rate": 4.112984741418495e-05, "loss": 0.664648175239563, "step": 4984 }, { "epoch": 6.116564417177914, "grad_norm": 0.264396071434021, "learning_rate": 4.112601235614593e-05, "loss": 0.7198718190193176, "step": 4985 }, { "epoch": 6.117791411042945, "grad_norm": 0.2743094563484192, "learning_rate": 4.112217664811334e-05, "loss": 0.7096498012542725, "step": 4986 }, { "epoch": 6.119018404907975, "grad_norm": 0.24934764206409454, "learning_rate": 4.1118340290241794e-05, "loss": 0.7127863168716431, "step": 4987 }, { "epoch": 6.120245398773006, "grad_norm": 0.19340673089027405, "learning_rate": 4.1114503282685925e-05, "loss": 0.8349015116691589, "step": 4988 }, { "epoch": 6.1214723926380366, "grad_norm": 0.21065913140773773, "learning_rate": 4.111066562560038e-05, "loss": 0.6509546041488647, "step": 4989 }, { "epoch": 6.122699386503068, "grad_norm": 0.27880436182022095, "learning_rate": 4.1106827319139854e-05, "loss": 0.7083147168159485, "step": 4990 }, { "epoch": 6.123926380368098, "grad_norm": 0.31197577714920044, "learning_rate": 4.110298836345906e-05, "loss": 0.8883185386657715, "step": 4991 }, { "epoch": 6.125153374233129, "grad_norm": 0.2958407998085022, "learning_rate": 4.109914875871273e-05, "loss": 0.7400007843971252, "step": 4992 }, { "epoch": 6.126380368098159, "grad_norm": 0.2818317711353302, "learning_rate": 4.109530850505563e-05, "loss": 0.6054843664169312, "step": 4993 }, { "epoch": 6.12760736196319, "grad_norm": 0.29367583990097046, "learning_rate": 4.109146760264255e-05, "loss": 0.7565825581550598, "step": 4994 }, { "epoch": 6.128834355828221, "grad_norm": 0.24861857295036316, "learning_rate": 4.1087626051628305e-05, "loss": 0.6567690372467041, "step": 4995 }, { "epoch": 6.130061349693252, "grad_norm": 0.2547912895679474, "learning_rate": 4.1083783852167744e-05, "loss": 0.8263748288154602, "step": 4996 }, { "epoch": 6.131288343558282, "grad_norm": 0.24758997559547424, "learning_rate": 4.107994100441572e-05, "loss": 0.7693375945091248, "step": 4997 }, { "epoch": 6.132515337423313, "grad_norm": 0.24427226185798645, "learning_rate": 4.107609750852714e-05, "loss": 0.7747365236282349, "step": 4998 }, { "epoch": 6.133742331288343, "grad_norm": 0.23891130089759827, "learning_rate": 4.107225336465692e-05, "loss": 0.6981843709945679, "step": 4999 }, { "epoch": 6.134969325153374, "grad_norm": 0.27443790435791016, "learning_rate": 4.1068408572960004e-05, "loss": 0.6200233697891235, "step": 5000 }, { "epoch": 6.136196319018405, "grad_norm": 0.26703235507011414, "learning_rate": 4.1064563133591365e-05, "loss": 0.5118700265884399, "step": 5001 }, { "epoch": 6.137423312883436, "grad_norm": 0.20964550971984863, "learning_rate": 4.106071704670601e-05, "loss": 0.6438997983932495, "step": 5002 }, { "epoch": 6.138650306748466, "grad_norm": 0.2943136394023895, "learning_rate": 4.105687031245895e-05, "loss": 0.6547777652740479, "step": 5003 }, { "epoch": 6.139877300613497, "grad_norm": 0.23474006354808807, "learning_rate": 4.105302293100525e-05, "loss": 0.7480200529098511, "step": 5004 }, { "epoch": 6.141104294478527, "grad_norm": 0.2758606970310211, "learning_rate": 4.1049174902499974e-05, "loss": 0.6279100179672241, "step": 5005 }, { "epoch": 6.142331288343558, "grad_norm": 0.273654580116272, "learning_rate": 4.1045326227098237e-05, "loss": 0.6856517195701599, "step": 5006 }, { "epoch": 6.143558282208589, "grad_norm": 0.2091088891029358, "learning_rate": 4.104147690495516e-05, "loss": 0.7248075008392334, "step": 5007 }, { "epoch": 6.14478527607362, "grad_norm": 0.2333425134420395, "learning_rate": 4.1037626936225895e-05, "loss": 0.7723984718322754, "step": 5008 }, { "epoch": 6.14601226993865, "grad_norm": 0.2936597466468811, "learning_rate": 4.103377632106564e-05, "loss": 0.557856559753418, "step": 5009 }, { "epoch": 6.147239263803681, "grad_norm": 0.25354525446891785, "learning_rate": 4.102992505962958e-05, "loss": 0.8638439178466797, "step": 5010 }, { "epoch": 6.148466257668711, "grad_norm": 0.22400924563407898, "learning_rate": 4.1026073152072964e-05, "loss": 0.8864084482192993, "step": 5011 }, { "epoch": 6.149693251533742, "grad_norm": 0.20541228353977203, "learning_rate": 4.102222059855105e-05, "loss": 0.8205896615982056, "step": 5012 }, { "epoch": 6.150920245398773, "grad_norm": 0.26373013854026794, "learning_rate": 4.101836739921912e-05, "loss": 0.7085107564926147, "step": 5013 }, { "epoch": 6.152147239263804, "grad_norm": 0.24392272531986237, "learning_rate": 4.101451355423249e-05, "loss": 0.7263004779815674, "step": 5014 }, { "epoch": 6.153374233128835, "grad_norm": 0.29083746671676636, "learning_rate": 4.1010659063746485e-05, "loss": 0.5955847501754761, "step": 5015 }, { "epoch": 6.154601226993865, "grad_norm": 0.23725157976150513, "learning_rate": 4.100680392791648e-05, "loss": 0.7794686555862427, "step": 5016 }, { "epoch": 6.155828220858895, "grad_norm": 0.2539575695991516, "learning_rate": 4.100294814689786e-05, "loss": 0.6986119747161865, "step": 5017 }, { "epoch": 6.157055214723926, "grad_norm": 0.2476261407136917, "learning_rate": 4.099909172084604e-05, "loss": 0.7141410112380981, "step": 5018 }, { "epoch": 6.1582822085889575, "grad_norm": 0.25903046131134033, "learning_rate": 4.099523464991647e-05, "loss": 0.7244953513145447, "step": 5019 }, { "epoch": 6.159509202453988, "grad_norm": 0.27797940373420715, "learning_rate": 4.099137693426461e-05, "loss": 0.5536707043647766, "step": 5020 }, { "epoch": 6.160736196319019, "grad_norm": 0.25296154618263245, "learning_rate": 4.0987518574045954e-05, "loss": 0.7040913105010986, "step": 5021 }, { "epoch": 6.161963190184049, "grad_norm": 0.2342815101146698, "learning_rate": 4.098365956941602e-05, "loss": 0.8583805561065674, "step": 5022 }, { "epoch": 6.16319018404908, "grad_norm": 0.27934160828590393, "learning_rate": 4.0979799920530356e-05, "loss": 0.7664904594421387, "step": 5023 }, { "epoch": 6.16441717791411, "grad_norm": 0.2777153551578522, "learning_rate": 4.0975939627544534e-05, "loss": 0.5576032996177673, "step": 5024 }, { "epoch": 6.1656441717791415, "grad_norm": 0.24082311987876892, "learning_rate": 4.097207869061415e-05, "loss": 0.7966017127037048, "step": 5025 }, { "epoch": 6.166871165644172, "grad_norm": 0.2908640205860138, "learning_rate": 4.096821710989483e-05, "loss": 0.7361832857131958, "step": 5026 }, { "epoch": 6.168098159509203, "grad_norm": 0.3264956772327423, "learning_rate": 4.0964354885542214e-05, "loss": 0.669710636138916, "step": 5027 }, { "epoch": 6.169325153374233, "grad_norm": 0.3162890076637268, "learning_rate": 4.0960492017711994e-05, "loss": 0.6152166128158569, "step": 5028 }, { "epoch": 6.170552147239264, "grad_norm": 0.24182914197444916, "learning_rate": 4.095662850655985e-05, "loss": 0.6730844974517822, "step": 5029 }, { "epoch": 6.171779141104294, "grad_norm": 0.23089194297790527, "learning_rate": 4.095276435224153e-05, "loss": 0.8537930250167847, "step": 5030 }, { "epoch": 6.1730061349693255, "grad_norm": 0.20777219533920288, "learning_rate": 4.094889955491278e-05, "loss": 0.9585174322128296, "step": 5031 }, { "epoch": 6.174233128834356, "grad_norm": 0.23611867427825928, "learning_rate": 4.094503411472937e-05, "loss": 0.7657182216644287, "step": 5032 }, { "epoch": 6.175460122699387, "grad_norm": 0.3009524643421173, "learning_rate": 4.094116803184711e-05, "loss": 0.6792210340499878, "step": 5033 }, { "epoch": 6.176687116564417, "grad_norm": 0.3088632822036743, "learning_rate": 4.093730130642184e-05, "loss": 0.5717151761054993, "step": 5034 }, { "epoch": 6.177914110429448, "grad_norm": 0.2949053943157196, "learning_rate": 4.09334339386094e-05, "loss": 0.7864640951156616, "step": 5035 }, { "epoch": 6.179141104294478, "grad_norm": 0.31195876002311707, "learning_rate": 4.092956592856569e-05, "loss": 0.6106685400009155, "step": 5036 }, { "epoch": 6.1803680981595095, "grad_norm": 0.2614648938179016, "learning_rate": 4.092569727644661e-05, "loss": 0.6727712750434875, "step": 5037 }, { "epoch": 6.18159509202454, "grad_norm": 0.28633013367652893, "learning_rate": 4.0921827982408076e-05, "loss": 0.6777546405792236, "step": 5038 }, { "epoch": 6.182822085889571, "grad_norm": 0.23242829740047455, "learning_rate": 4.0917958046606086e-05, "loss": 0.7586407661437988, "step": 5039 }, { "epoch": 6.184049079754601, "grad_norm": 0.246103897690773, "learning_rate": 4.091408746919661e-05, "loss": 0.5931556820869446, "step": 5040 }, { "epoch": 6.185276073619632, "grad_norm": 0.2126043438911438, "learning_rate": 4.0910216250335645e-05, "loss": 0.8959083557128906, "step": 5041 }, { "epoch": 6.186503067484662, "grad_norm": 0.2589772939682007, "learning_rate": 4.090634439017924e-05, "loss": 0.8372722268104553, "step": 5042 }, { "epoch": 6.1877300613496935, "grad_norm": 0.26290300488471985, "learning_rate": 4.090247188888347e-05, "loss": 0.573223352432251, "step": 5043 }, { "epoch": 6.188957055214724, "grad_norm": 0.25087347626686096, "learning_rate": 4.08985987466044e-05, "loss": 0.9167582392692566, "step": 5044 }, { "epoch": 6.190184049079755, "grad_norm": 0.2392032891511917, "learning_rate": 4.089472496349816e-05, "loss": 0.7892038822174072, "step": 5045 }, { "epoch": 6.191411042944785, "grad_norm": 0.294599324464798, "learning_rate": 4.089085053972089e-05, "loss": 0.6310349702835083, "step": 5046 }, { "epoch": 6.192638036809816, "grad_norm": 0.2020144909620285, "learning_rate": 4.0886975475428766e-05, "loss": 0.7643856406211853, "step": 5047 }, { "epoch": 6.193865030674846, "grad_norm": 0.2264660894870758, "learning_rate": 4.0883099770777965e-05, "loss": 0.7550011873245239, "step": 5048 }, { "epoch": 6.1950920245398775, "grad_norm": 0.3060932755470276, "learning_rate": 4.087922342592471e-05, "loss": 0.7191464304924011, "step": 5049 }, { "epoch": 6.196319018404908, "grad_norm": 0.3081960082054138, "learning_rate": 4.087534644102525e-05, "loss": 0.7170820236206055, "step": 5050 }, { "epoch": 6.197546012269939, "grad_norm": 0.2430078238248825, "learning_rate": 4.087146881623586e-05, "loss": 0.775449275970459, "step": 5051 }, { "epoch": 6.198773006134969, "grad_norm": 0.29831644892692566, "learning_rate": 4.086759055171282e-05, "loss": 0.6951881647109985, "step": 5052 }, { "epoch": 6.2, "grad_norm": 0.2252759486436844, "learning_rate": 4.086371164761247e-05, "loss": 0.7928915023803711, "step": 5053 }, { "epoch": 6.20122699386503, "grad_norm": 0.2681780159473419, "learning_rate": 4.085983210409114e-05, "loss": 0.6232633590698242, "step": 5054 }, { "epoch": 6.2024539877300615, "grad_norm": 0.2398170679807663, "learning_rate": 4.085595192130522e-05, "loss": 0.7964296340942383, "step": 5055 }, { "epoch": 6.203680981595092, "grad_norm": 0.23167967796325684, "learning_rate": 4.0852071099411096e-05, "loss": 0.7954854965209961, "step": 5056 }, { "epoch": 6.204907975460123, "grad_norm": 0.26835429668426514, "learning_rate": 4.0848189638565186e-05, "loss": 0.7713038325309753, "step": 5057 }, { "epoch": 6.206134969325153, "grad_norm": 0.19506807625293732, "learning_rate": 4.0844307538923964e-05, "loss": 0.7203356027603149, "step": 5058 }, { "epoch": 6.207361963190184, "grad_norm": 0.29955896735191345, "learning_rate": 4.0840424800643895e-05, "loss": 0.622001588344574, "step": 5059 }, { "epoch": 6.208588957055214, "grad_norm": 0.2766510546207428, "learning_rate": 4.083654142388147e-05, "loss": 0.7721490859985352, "step": 5060 }, { "epoch": 6.2098159509202455, "grad_norm": 0.4043380916118622, "learning_rate": 4.083265740879324e-05, "loss": 0.5264126062393188, "step": 5061 }, { "epoch": 6.211042944785276, "grad_norm": 0.2444452941417694, "learning_rate": 4.082877275553574e-05, "loss": 0.5939956307411194, "step": 5062 }, { "epoch": 6.212269938650307, "grad_norm": 0.26801881194114685, "learning_rate": 4.082488746426556e-05, "loss": 0.7257589101791382, "step": 5063 }, { "epoch": 6.213496932515337, "grad_norm": 0.29194176197052, "learning_rate": 4.082100153513929e-05, "loss": 0.7817088961601257, "step": 5064 }, { "epoch": 6.214723926380368, "grad_norm": 0.2716687321662903, "learning_rate": 4.081711496831359e-05, "loss": 0.7043301463127136, "step": 5065 }, { "epoch": 6.215950920245398, "grad_norm": 0.2858879864215851, "learning_rate": 4.0813227763945085e-05, "loss": 0.7361749410629272, "step": 5066 }, { "epoch": 6.2171779141104295, "grad_norm": 0.28310996294021606, "learning_rate": 4.080933992219047e-05, "loss": 0.6580299139022827, "step": 5067 }, { "epoch": 6.21840490797546, "grad_norm": 0.2783679664134979, "learning_rate": 4.0805451443206446e-05, "loss": 0.6581112146377563, "step": 5068 }, { "epoch": 6.219631901840491, "grad_norm": 0.21924246847629547, "learning_rate": 4.080156232714976e-05, "loss": 0.7822847366333008, "step": 5069 }, { "epoch": 6.220858895705521, "grad_norm": 0.29192084074020386, "learning_rate": 4.079767257417717e-05, "loss": 0.6020759344100952, "step": 5070 }, { "epoch": 6.222085889570552, "grad_norm": 0.22668150067329407, "learning_rate": 4.079378218444544e-05, "loss": 0.8278855085372925, "step": 5071 }, { "epoch": 6.223312883435582, "grad_norm": 0.29786574840545654, "learning_rate": 4.07898911581114e-05, "loss": 0.6953619718551636, "step": 5072 }, { "epoch": 6.2245398773006135, "grad_norm": 0.23679135739803314, "learning_rate": 4.0785999495331894e-05, "loss": 0.7490599751472473, "step": 5073 }, { "epoch": 6.225766871165644, "grad_norm": 0.2867676019668579, "learning_rate": 4.0782107196263764e-05, "loss": 0.5986911654472351, "step": 5074 }, { "epoch": 6.226993865030675, "grad_norm": 0.23632338643074036, "learning_rate": 4.0778214261063896e-05, "loss": 0.6913770437240601, "step": 5075 }, { "epoch": 6.228220858895705, "grad_norm": 0.35729435086250305, "learning_rate": 4.077432068988922e-05, "loss": 0.6052665710449219, "step": 5076 }, { "epoch": 6.229447852760736, "grad_norm": 0.21816319227218628, "learning_rate": 4.077042648289667e-05, "loss": 0.768839955329895, "step": 5077 }, { "epoch": 6.230674846625767, "grad_norm": 0.21795295178890228, "learning_rate": 4.0766531640243204e-05, "loss": 0.7646372318267822, "step": 5078 }, { "epoch": 6.2319018404907975, "grad_norm": 0.2640405595302582, "learning_rate": 4.076263616208581e-05, "loss": 0.6181118488311768, "step": 5079 }, { "epoch": 6.233128834355828, "grad_norm": 0.2487548440694809, "learning_rate": 4.0758740048581516e-05, "loss": 0.8600499033927917, "step": 5080 }, { "epoch": 6.234355828220859, "grad_norm": 0.2537027895450592, "learning_rate": 4.0754843299887355e-05, "loss": 0.8777683973312378, "step": 5081 }, { "epoch": 6.23558282208589, "grad_norm": 0.30106881260871887, "learning_rate": 4.075094591616039e-05, "loss": 0.7363409399986267, "step": 5082 }, { "epoch": 6.23680981595092, "grad_norm": 0.32139092683792114, "learning_rate": 4.074704789755772e-05, "loss": 0.6427978277206421, "step": 5083 }, { "epoch": 6.238036809815951, "grad_norm": 0.32965049147605896, "learning_rate": 4.074314924423647e-05, "loss": 0.5928045511245728, "step": 5084 }, { "epoch": 6.2392638036809815, "grad_norm": 0.3104090690612793, "learning_rate": 4.073924995635376e-05, "loss": 0.6975009441375732, "step": 5085 }, { "epoch": 6.240490797546013, "grad_norm": 0.23319292068481445, "learning_rate": 4.073535003406679e-05, "loss": 0.925682783126831, "step": 5086 }, { "epoch": 6.241717791411043, "grad_norm": 0.24994607269763947, "learning_rate": 4.073144947753272e-05, "loss": 0.7936456203460693, "step": 5087 }, { "epoch": 6.242944785276074, "grad_norm": 0.319470077753067, "learning_rate": 4.0727548286908806e-05, "loss": 0.5979012846946716, "step": 5088 }, { "epoch": 6.244171779141104, "grad_norm": 0.2675314247608185, "learning_rate": 4.0723646462352274e-05, "loss": 0.7973897457122803, "step": 5089 }, { "epoch": 6.245398773006135, "grad_norm": 0.29638585448265076, "learning_rate": 4.0719744004020394e-05, "loss": 0.7197015285491943, "step": 5090 }, { "epoch": 6.2466257668711656, "grad_norm": 0.2949064373970032, "learning_rate": 4.071584091207047e-05, "loss": 0.771479606628418, "step": 5091 }, { "epoch": 6.247852760736197, "grad_norm": 0.3181730806827545, "learning_rate": 4.071193718665982e-05, "loss": 0.6950725317001343, "step": 5092 }, { "epoch": 6.249079754601227, "grad_norm": 0.24523822963237762, "learning_rate": 4.070803282794578e-05, "loss": 0.8340487480163574, "step": 5093 }, { "epoch": 6.250306748466258, "grad_norm": 0.23830737173557281, "learning_rate": 4.070412783608576e-05, "loss": 0.5855191349983215, "step": 5094 }, { "epoch": 6.251533742331288, "grad_norm": 0.2256721705198288, "learning_rate": 4.070022221123713e-05, "loss": 0.7678561210632324, "step": 5095 }, { "epoch": 6.252760736196319, "grad_norm": 0.2592581510543823, "learning_rate": 4.0696315953557315e-05, "loss": 0.7170540690422058, "step": 5096 }, { "epoch": 6.25398773006135, "grad_norm": 0.35746657848358154, "learning_rate": 4.069240906320377e-05, "loss": 0.4237406849861145, "step": 5097 }, { "epoch": 6.255214723926381, "grad_norm": 0.2974379062652588, "learning_rate": 4.068850154033398e-05, "loss": 0.6016747355461121, "step": 5098 }, { "epoch": 6.256441717791411, "grad_norm": 0.2993706166744232, "learning_rate": 4.0684593385105435e-05, "loss": 0.43180859088897705, "step": 5099 }, { "epoch": 6.257668711656442, "grad_norm": 0.21848249435424805, "learning_rate": 4.0680684597675665e-05, "loss": 0.8157687187194824, "step": 5100 }, { "epoch": 6.258895705521472, "grad_norm": 0.23510652780532837, "learning_rate": 4.067677517820222e-05, "loss": 0.8477776646614075, "step": 5101 }, { "epoch": 6.260122699386503, "grad_norm": 0.2837962210178375, "learning_rate": 4.067286512684269e-05, "loss": 0.7462184429168701, "step": 5102 }, { "epoch": 6.261349693251534, "grad_norm": 0.23796658217906952, "learning_rate": 4.0668954443754646e-05, "loss": 0.7622103095054626, "step": 5103 }, { "epoch": 6.262576687116565, "grad_norm": 0.31910842657089233, "learning_rate": 4.0665043129095745e-05, "loss": 0.7095378041267395, "step": 5104 }, { "epoch": 6.263803680981595, "grad_norm": 0.2501404881477356, "learning_rate": 4.0661131183023634e-05, "loss": 0.8306862115859985, "step": 5105 }, { "epoch": 6.265030674846626, "grad_norm": 0.2573075592517853, "learning_rate": 4.0657218605696e-05, "loss": 0.7898120880126953, "step": 5106 }, { "epoch": 6.266257668711656, "grad_norm": 0.2811504900455475, "learning_rate": 4.0653305397270534e-05, "loss": 0.7107223868370056, "step": 5107 }, { "epoch": 6.267484662576687, "grad_norm": 0.23866309225559235, "learning_rate": 4.064939155790498e-05, "loss": 0.9142500758171082, "step": 5108 }, { "epoch": 6.268711656441718, "grad_norm": 0.24991180002689362, "learning_rate": 4.064547708775708e-05, "loss": 0.6929014921188354, "step": 5109 }, { "epoch": 6.269938650306749, "grad_norm": 0.2175208330154419, "learning_rate": 4.064156198698461e-05, "loss": 0.7984678745269775, "step": 5110 }, { "epoch": 6.271165644171779, "grad_norm": 0.2901901304721832, "learning_rate": 4.06376462557454e-05, "loss": 0.5624164342880249, "step": 5111 }, { "epoch": 6.27239263803681, "grad_norm": 0.36911913752555847, "learning_rate": 4.063372989419727e-05, "loss": 0.6838858127593994, "step": 5112 }, { "epoch": 6.27361963190184, "grad_norm": 0.2586519122123718, "learning_rate": 4.0629812902498075e-05, "loss": 0.6286542415618896, "step": 5113 }, { "epoch": 6.274846625766871, "grad_norm": 0.26288220286369324, "learning_rate": 4.06258952808057e-05, "loss": 0.5962402820587158, "step": 5114 }, { "epoch": 6.276073619631902, "grad_norm": 0.2667088210582733, "learning_rate": 4.062197702927805e-05, "loss": 0.5910913348197937, "step": 5115 }, { "epoch": 6.277300613496933, "grad_norm": 0.2312679886817932, "learning_rate": 4.061805814807306e-05, "loss": 0.6904770135879517, "step": 5116 }, { "epoch": 6.278527607361963, "grad_norm": 0.36499232053756714, "learning_rate": 4.0614138637348686e-05, "loss": 0.7207587361335754, "step": 5117 }, { "epoch": 6.279754601226994, "grad_norm": 0.3091434836387634, "learning_rate": 4.061021849726292e-05, "loss": 0.5683885812759399, "step": 5118 }, { "epoch": 6.280981595092024, "grad_norm": 0.2215614914894104, "learning_rate": 4.0606297727973776e-05, "loss": 0.8005905747413635, "step": 5119 }, { "epoch": 6.282208588957055, "grad_norm": 0.26518794894218445, "learning_rate": 4.0602376329639274e-05, "loss": 0.9250926971435547, "step": 5120 }, { "epoch": 6.283435582822086, "grad_norm": 0.22191333770751953, "learning_rate": 4.059845430241748e-05, "loss": 0.8297696709632874, "step": 5121 }, { "epoch": 6.284662576687117, "grad_norm": 0.33192992210388184, "learning_rate": 4.0594531646466486e-05, "loss": 0.5813419818878174, "step": 5122 }, { "epoch": 6.285889570552147, "grad_norm": 0.23173214495182037, "learning_rate": 4.05906083619444e-05, "loss": 0.6459895968437195, "step": 5123 }, { "epoch": 6.287116564417178, "grad_norm": 0.29544898867607117, "learning_rate": 4.058668444900935e-05, "loss": 0.8657174706459045, "step": 5124 }, { "epoch": 6.288343558282208, "grad_norm": 0.22943954169750214, "learning_rate": 4.058275990781951e-05, "loss": 0.6067493557929993, "step": 5125 }, { "epoch": 6.289570552147239, "grad_norm": 0.21732567250728607, "learning_rate": 4.057883473853305e-05, "loss": 0.8731178641319275, "step": 5126 }, { "epoch": 6.29079754601227, "grad_norm": 0.3120827376842499, "learning_rate": 4.057490894130821e-05, "loss": 0.6437867879867554, "step": 5127 }, { "epoch": 6.292024539877301, "grad_norm": 0.2666621804237366, "learning_rate": 4.05709825163032e-05, "loss": 0.5453451871871948, "step": 5128 }, { "epoch": 6.293251533742331, "grad_norm": 0.2321954220533371, "learning_rate": 4.056705546367631e-05, "loss": 0.7727856040000916, "step": 5129 }, { "epoch": 6.294478527607362, "grad_norm": 0.24470753967761993, "learning_rate": 4.0563127783585794e-05, "loss": 0.8220932483673096, "step": 5130 }, { "epoch": 6.295705521472392, "grad_norm": 0.2884701192378998, "learning_rate": 4.055919947619e-05, "loss": 0.64185631275177, "step": 5131 }, { "epoch": 6.296932515337423, "grad_norm": 0.2774549126625061, "learning_rate": 4.055527054164724e-05, "loss": 0.6716498136520386, "step": 5132 }, { "epoch": 6.298159509202454, "grad_norm": 0.2559015452861786, "learning_rate": 4.055134098011589e-05, "loss": 0.7837691307067871, "step": 5133 }, { "epoch": 6.299386503067485, "grad_norm": 0.2388256937265396, "learning_rate": 4.054741079175435e-05, "loss": 0.7446190118789673, "step": 5134 }, { "epoch": 6.300613496932515, "grad_norm": 0.30980628728866577, "learning_rate": 4.054347997672101e-05, "loss": 0.7865623235702515, "step": 5135 }, { "epoch": 6.301840490797546, "grad_norm": 0.26088011264801025, "learning_rate": 4.0539548535174334e-05, "loss": 0.7431743144989014, "step": 5136 }, { "epoch": 6.303067484662577, "grad_norm": 0.27154505252838135, "learning_rate": 4.0535616467272775e-05, "loss": 0.7025604248046875, "step": 5137 }, { "epoch": 6.304294478527607, "grad_norm": 0.2882933020591736, "learning_rate": 4.0531683773174815e-05, "loss": 0.628066897392273, "step": 5138 }, { "epoch": 6.305521472392638, "grad_norm": 0.23668646812438965, "learning_rate": 4.052775045303899e-05, "loss": 0.7646747827529907, "step": 5139 }, { "epoch": 6.306748466257669, "grad_norm": 0.21987923979759216, "learning_rate": 4.052381650702383e-05, "loss": 0.8568859696388245, "step": 5140 }, { "epoch": 6.3079754601227, "grad_norm": 0.30074596405029297, "learning_rate": 4.05198819352879e-05, "loss": 0.7724974155426025, "step": 5141 }, { "epoch": 6.30920245398773, "grad_norm": 0.2767634689807892, "learning_rate": 4.05159467379898e-05, "loss": 0.7186554670333862, "step": 5142 }, { "epoch": 6.31042944785276, "grad_norm": 0.26469287276268005, "learning_rate": 4.0512010915288124e-05, "loss": 0.7275973558425903, "step": 5143 }, { "epoch": 6.311656441717791, "grad_norm": 0.2540748119354248, "learning_rate": 4.050807446734154e-05, "loss": 0.6538571119308472, "step": 5144 }, { "epoch": 6.3128834355828225, "grad_norm": 0.26365527510643005, "learning_rate": 4.05041373943087e-05, "loss": 0.7863047122955322, "step": 5145 }, { "epoch": 6.314110429447853, "grad_norm": 0.2435222715139389, "learning_rate": 4.0500199696348306e-05, "loss": 0.7120739221572876, "step": 5146 }, { "epoch": 6.315337423312884, "grad_norm": 0.24021609127521515, "learning_rate": 4.049626137361907e-05, "loss": 0.8826924562454224, "step": 5147 }, { "epoch": 6.316564417177914, "grad_norm": 0.28473708033561707, "learning_rate": 4.049232242627974e-05, "loss": 0.563206672668457, "step": 5148 }, { "epoch": 6.317791411042945, "grad_norm": 0.2538278102874756, "learning_rate": 4.048838285448907e-05, "loss": 0.8040125370025635, "step": 5149 }, { "epoch": 6.319018404907975, "grad_norm": 0.2598775029182434, "learning_rate": 4.0484442658405855e-05, "loss": 0.7388876676559448, "step": 5150 }, { "epoch": 6.3202453987730065, "grad_norm": 0.19646021723747253, "learning_rate": 4.0480501838188937e-05, "loss": 0.7738609313964844, "step": 5151 }, { "epoch": 6.321472392638037, "grad_norm": 0.2551524341106415, "learning_rate": 4.047656039399713e-05, "loss": 0.8047677874565125, "step": 5152 }, { "epoch": 6.322699386503068, "grad_norm": 0.26998892426490784, "learning_rate": 4.047261832598931e-05, "loss": 0.48095816373825073, "step": 5153 }, { "epoch": 6.323926380368098, "grad_norm": 0.26020875573158264, "learning_rate": 4.046867563432438e-05, "loss": 0.864093542098999, "step": 5154 }, { "epoch": 6.325153374233129, "grad_norm": 0.2292812466621399, "learning_rate": 4.0464732319161255e-05, "loss": 0.703270435333252, "step": 5155 }, { "epoch": 6.326380368098159, "grad_norm": 0.3188858926296234, "learning_rate": 4.0460788380658874e-05, "loss": 0.6838997602462769, "step": 5156 }, { "epoch": 6.3276073619631905, "grad_norm": 0.28426089882850647, "learning_rate": 4.045684381897622e-05, "loss": 0.6347602605819702, "step": 5157 }, { "epoch": 6.328834355828221, "grad_norm": 0.38151103258132935, "learning_rate": 4.0452898634272265e-05, "loss": 0.4827333688735962, "step": 5158 }, { "epoch": 6.330061349693252, "grad_norm": 0.9081712365150452, "learning_rate": 4.044895282670604e-05, "loss": 0.7669130563735962, "step": 5159 }, { "epoch": 6.331288343558282, "grad_norm": 0.23036448657512665, "learning_rate": 4.0445006396436586e-05, "loss": 0.77034592628479, "step": 5160 }, { "epoch": 6.332515337423313, "grad_norm": 0.22733283042907715, "learning_rate": 4.044105934362298e-05, "loss": 0.7522379755973816, "step": 5161 }, { "epoch": 6.333742331288343, "grad_norm": 0.22501173615455627, "learning_rate": 4.043711166842431e-05, "loss": 0.9326243996620178, "step": 5162 }, { "epoch": 6.3349693251533745, "grad_norm": 0.24582231044769287, "learning_rate": 4.0433163370999696e-05, "loss": 0.7099176645278931, "step": 5163 }, { "epoch": 6.336196319018405, "grad_norm": 0.25239166617393494, "learning_rate": 4.042921445150828e-05, "loss": 0.7731028199195862, "step": 5164 }, { "epoch": 6.337423312883436, "grad_norm": 0.22336333990097046, "learning_rate": 4.0425264910109237e-05, "loss": 0.6651943922042847, "step": 5165 }, { "epoch": 6.338650306748466, "grad_norm": 0.2612956464290619, "learning_rate": 4.042131474696177e-05, "loss": 0.8087329864501953, "step": 5166 }, { "epoch": 6.339877300613497, "grad_norm": 0.3275611698627472, "learning_rate": 4.0417363962225073e-05, "loss": 0.738402783870697, "step": 5167 }, { "epoch": 6.341104294478527, "grad_norm": 0.22586889564990997, "learning_rate": 4.0413412556058415e-05, "loss": 0.7709367275238037, "step": 5168 }, { "epoch": 6.3423312883435585, "grad_norm": 0.25050657987594604, "learning_rate": 4.0409460528621055e-05, "loss": 0.6613065600395203, "step": 5169 }, { "epoch": 6.343558282208589, "grad_norm": 0.2221677452325821, "learning_rate": 4.040550788007229e-05, "loss": 0.6484233140945435, "step": 5170 }, { "epoch": 6.34478527607362, "grad_norm": 0.2648455500602722, "learning_rate": 4.040155461057143e-05, "loss": 0.8105323314666748, "step": 5171 }, { "epoch": 6.34601226993865, "grad_norm": 0.230443075299263, "learning_rate": 4.039760072027785e-05, "loss": 0.7421295046806335, "step": 5172 }, { "epoch": 6.347239263803681, "grad_norm": 0.23937460780143738, "learning_rate": 4.0393646209350886e-05, "loss": 0.8245289325714111, "step": 5173 }, { "epoch": 6.348466257668711, "grad_norm": 0.24263422191143036, "learning_rate": 4.0389691077949955e-05, "loss": 0.8034491539001465, "step": 5174 }, { "epoch": 6.3496932515337425, "grad_norm": 0.342013955116272, "learning_rate": 4.0385735326234466e-05, "loss": 0.6505697965621948, "step": 5175 }, { "epoch": 6.350920245398773, "grad_norm": 0.24243222177028656, "learning_rate": 4.0381778954363867e-05, "loss": 0.718284010887146, "step": 5176 }, { "epoch": 6.352147239263804, "grad_norm": 0.25043195486068726, "learning_rate": 4.0377821962497635e-05, "loss": 0.645535945892334, "step": 5177 }, { "epoch": 6.353374233128834, "grad_norm": 0.23554404079914093, "learning_rate": 4.037386435079525e-05, "loss": 0.7030754089355469, "step": 5178 }, { "epoch": 6.354601226993865, "grad_norm": 0.2845529019832611, "learning_rate": 4.036990611941625e-05, "loss": 0.7741358280181885, "step": 5179 }, { "epoch": 6.355828220858895, "grad_norm": 0.2502102553844452, "learning_rate": 4.0365947268520165e-05, "loss": 0.623693585395813, "step": 5180 }, { "epoch": 6.3570552147239265, "grad_norm": 0.2558685541152954, "learning_rate": 4.0361987798266565e-05, "loss": 0.7391117811203003, "step": 5181 }, { "epoch": 6.358282208588957, "grad_norm": 0.24517722427845, "learning_rate": 4.035802770881506e-05, "loss": 0.7841464877128601, "step": 5182 }, { "epoch": 6.359509202453988, "grad_norm": 0.30093279480934143, "learning_rate": 4.035406700032526e-05, "loss": 0.7152426242828369, "step": 5183 }, { "epoch": 6.360736196319018, "grad_norm": 0.28298792243003845, "learning_rate": 4.03501056729568e-05, "loss": 0.47542983293533325, "step": 5184 }, { "epoch": 6.361963190184049, "grad_norm": 0.2590138614177704, "learning_rate": 4.0346143726869364e-05, "loss": 0.780259907245636, "step": 5185 }, { "epoch": 6.363190184049079, "grad_norm": 0.22500890493392944, "learning_rate": 4.034218116222265e-05, "loss": 0.7859333753585815, "step": 5186 }, { "epoch": 6.3644171779141105, "grad_norm": 0.24486799538135529, "learning_rate": 4.033821797917637e-05, "loss": 0.8325762748718262, "step": 5187 }, { "epoch": 6.365644171779141, "grad_norm": 0.32821282744407654, "learning_rate": 4.033425417789026e-05, "loss": 0.5502490401268005, "step": 5188 }, { "epoch": 6.366871165644172, "grad_norm": 0.2303633987903595, "learning_rate": 4.033028975852411e-05, "loss": 0.6606165766716003, "step": 5189 }, { "epoch": 6.368098159509202, "grad_norm": 0.2614404261112213, "learning_rate": 4.03263247212377e-05, "loss": 0.6320115327835083, "step": 5190 }, { "epoch": 6.369325153374233, "grad_norm": 0.2688010036945343, "learning_rate": 4.0322359066190854e-05, "loss": 0.7719141244888306, "step": 5191 }, { "epoch": 6.370552147239263, "grad_norm": 0.25764790177345276, "learning_rate": 4.03183927935434e-05, "loss": 0.8137650489807129, "step": 5192 }, { "epoch": 6.3717791411042946, "grad_norm": 0.35349398851394653, "learning_rate": 4.031442590345523e-05, "loss": 0.7170403599739075, "step": 5193 }, { "epoch": 6.373006134969325, "grad_norm": 0.23487353324890137, "learning_rate": 4.031045839608624e-05, "loss": 0.7240995168685913, "step": 5194 }, { "epoch": 6.374233128834356, "grad_norm": 0.2679123282432556, "learning_rate": 4.0306490271596334e-05, "loss": 0.6153730154037476, "step": 5195 }, { "epoch": 6.375460122699386, "grad_norm": 0.223740816116333, "learning_rate": 4.030252153014545e-05, "loss": 0.7148211002349854, "step": 5196 }, { "epoch": 6.376687116564417, "grad_norm": 0.2980267405509949, "learning_rate": 4.0298552171893576e-05, "loss": 0.695189356803894, "step": 5197 }, { "epoch": 6.3779141104294474, "grad_norm": 0.2504207491874695, "learning_rate": 4.02945821970007e-05, "loss": 0.746281623840332, "step": 5198 }, { "epoch": 6.379141104294479, "grad_norm": 0.286589652299881, "learning_rate": 4.0290611605626836e-05, "loss": 0.7122493386268616, "step": 5199 }, { "epoch": 6.38036809815951, "grad_norm": 0.34866979718208313, "learning_rate": 4.028664039793203e-05, "loss": 0.6125848293304443, "step": 5200 }, { "epoch": 6.38159509202454, "grad_norm": 0.24537529051303864, "learning_rate": 4.028266857407634e-05, "loss": 0.6982302665710449, "step": 5201 }, { "epoch": 6.38282208588957, "grad_norm": 0.2616350054740906, "learning_rate": 4.027869613421987e-05, "loss": 0.5432535409927368, "step": 5202 }, { "epoch": 6.384049079754601, "grad_norm": 0.2920369803905487, "learning_rate": 4.0274723078522745e-05, "loss": 0.7385891675949097, "step": 5203 }, { "epoch": 6.385276073619632, "grad_norm": 0.24408791959285736, "learning_rate": 4.027074940714508e-05, "loss": 0.8094451427459717, "step": 5204 }, { "epoch": 6.386503067484663, "grad_norm": 0.21701543033123016, "learning_rate": 4.026677512024708e-05, "loss": 0.6896264553070068, "step": 5205 }, { "epoch": 6.387730061349693, "grad_norm": 0.30096301436424255, "learning_rate": 4.02628002179889e-05, "loss": 0.7540544271469116, "step": 5206 }, { "epoch": 6.388957055214724, "grad_norm": 0.3012639284133911, "learning_rate": 4.0258824700530787e-05, "loss": 0.7898674011230469, "step": 5207 }, { "epoch": 6.390184049079755, "grad_norm": 0.22049656510353088, "learning_rate": 4.025484856803296e-05, "loss": 0.7283636927604675, "step": 5208 }, { "epoch": 6.391411042944785, "grad_norm": 0.26545825600624084, "learning_rate": 4.02508718206557e-05, "loss": 0.6525203585624695, "step": 5209 }, { "epoch": 6.392638036809816, "grad_norm": 0.33609870076179504, "learning_rate": 4.024689445855929e-05, "loss": 0.602336049079895, "step": 5210 }, { "epoch": 6.393865030674847, "grad_norm": 0.2765834629535675, "learning_rate": 4.024291648190405e-05, "loss": 0.7022169828414917, "step": 5211 }, { "epoch": 6.395092024539878, "grad_norm": 0.21761204302310944, "learning_rate": 4.023893789085033e-05, "loss": 0.5907342433929443, "step": 5212 }, { "epoch": 6.396319018404908, "grad_norm": 0.20439524948596954, "learning_rate": 4.023495868555848e-05, "loss": 0.9836783409118652, "step": 5213 }, { "epoch": 6.397546012269939, "grad_norm": 0.26282018423080444, "learning_rate": 4.02309788661889e-05, "loss": 0.6808322072029114, "step": 5214 }, { "epoch": 6.398773006134969, "grad_norm": 0.28590863943099976, "learning_rate": 4.022699843290199e-05, "loss": 0.7739900946617126, "step": 5215 }, { "epoch": 6.4, "grad_norm": 0.2258654236793518, "learning_rate": 4.0223017385858206e-05, "loss": 0.898026704788208, "step": 5216 }, { "epoch": 6.401226993865031, "grad_norm": 0.29610344767570496, "learning_rate": 4.021903572521802e-05, "loss": 0.8236136436462402, "step": 5217 }, { "epoch": 6.402453987730062, "grad_norm": 0.2594669461250305, "learning_rate": 4.02150534511419e-05, "loss": 0.728367805480957, "step": 5218 }, { "epoch": 6.403680981595092, "grad_norm": 0.2069224715232849, "learning_rate": 4.021107056379038e-05, "loss": 0.7994474172592163, "step": 5219 }, { "epoch": 6.404907975460123, "grad_norm": 0.22521547973155975, "learning_rate": 4.020708706332398e-05, "loss": 0.6937153339385986, "step": 5220 }, { "epoch": 6.406134969325153, "grad_norm": 0.24053451418876648, "learning_rate": 4.020310294990327e-05, "loss": 0.7782799005508423, "step": 5221 }, { "epoch": 6.407361963190184, "grad_norm": 0.25348517298698425, "learning_rate": 4.019911822368885e-05, "loss": 0.5724071860313416, "step": 5222 }, { "epoch": 6.408588957055215, "grad_norm": 0.25708529353141785, "learning_rate": 4.019513288484131e-05, "loss": 0.8262026906013489, "step": 5223 }, { "epoch": 6.409815950920246, "grad_norm": 0.24843433499336243, "learning_rate": 4.0191146933521314e-05, "loss": 0.774420976638794, "step": 5224 }, { "epoch": 6.411042944785276, "grad_norm": 0.2221628874540329, "learning_rate": 4.018716036988951e-05, "loss": 0.7390725612640381, "step": 5225 }, { "epoch": 6.412269938650307, "grad_norm": 0.413463830947876, "learning_rate": 4.018317319410658e-05, "loss": 0.5861309766769409, "step": 5226 }, { "epoch": 6.413496932515337, "grad_norm": 0.27810192108154297, "learning_rate": 4.017918540633325e-05, "loss": 0.6494849920272827, "step": 5227 }, { "epoch": 6.414723926380368, "grad_norm": 0.29554441571235657, "learning_rate": 4.0175197006730246e-05, "loss": 0.7665032744407654, "step": 5228 }, { "epoch": 6.415950920245399, "grad_norm": 0.3287356197834015, "learning_rate": 4.0171207995458326e-05, "loss": 0.47358381748199463, "step": 5229 }, { "epoch": 6.41717791411043, "grad_norm": 0.19300775229930878, "learning_rate": 4.0167218372678295e-05, "loss": 0.8121306300163269, "step": 5230 }, { "epoch": 6.41840490797546, "grad_norm": 0.23014181852340698, "learning_rate": 4.016322813855093e-05, "loss": 0.7630796432495117, "step": 5231 }, { "epoch": 6.419631901840491, "grad_norm": 0.32453882694244385, "learning_rate": 4.015923729323711e-05, "loss": 0.6787066459655762, "step": 5232 }, { "epoch": 6.420858895705521, "grad_norm": 0.2238764762878418, "learning_rate": 4.015524583689765e-05, "loss": 0.8655859231948853, "step": 5233 }, { "epoch": 6.422085889570552, "grad_norm": 0.2553533613681793, "learning_rate": 4.015125376969347e-05, "loss": 0.7783976197242737, "step": 5234 }, { "epoch": 6.423312883435583, "grad_norm": 0.23490971326828003, "learning_rate": 4.0147261091785447e-05, "loss": 1.0318522453308105, "step": 5235 }, { "epoch": 6.424539877300614, "grad_norm": 0.276302695274353, "learning_rate": 4.014326780333454e-05, "loss": 0.681302547454834, "step": 5236 }, { "epoch": 6.425766871165644, "grad_norm": 0.22140167653560638, "learning_rate": 4.0139273904501697e-05, "loss": 0.8133601546287537, "step": 5237 }, { "epoch": 6.426993865030675, "grad_norm": 0.3032333552837372, "learning_rate": 4.01352793954479e-05, "loss": 0.6655465364456177, "step": 5238 }, { "epoch": 6.428220858895705, "grad_norm": 0.24559976160526276, "learning_rate": 4.0131284276334154e-05, "loss": 0.6633833646774292, "step": 5239 }, { "epoch": 6.429447852760736, "grad_norm": 0.23770873248577118, "learning_rate": 4.01272885473215e-05, "loss": 0.8936383724212646, "step": 5240 }, { "epoch": 6.430674846625767, "grad_norm": 0.27989980578422546, "learning_rate": 4.012329220857098e-05, "loss": 0.645986795425415, "step": 5241 }, { "epoch": 6.431901840490798, "grad_norm": 0.2938571274280548, "learning_rate": 4.01192952602437e-05, "loss": 0.6313410997390747, "step": 5242 }, { "epoch": 6.433128834355828, "grad_norm": 0.30939483642578125, "learning_rate": 4.0115297702500744e-05, "loss": 0.6572954058647156, "step": 5243 }, { "epoch": 6.434355828220859, "grad_norm": 0.2443540096282959, "learning_rate": 4.011129953550324e-05, "loss": 0.8604369163513184, "step": 5244 }, { "epoch": 6.435582822085889, "grad_norm": 0.3377685546875, "learning_rate": 4.010730075941236e-05, "loss": 0.7038878202438354, "step": 5245 }, { "epoch": 6.43680981595092, "grad_norm": 0.23764607310295105, "learning_rate": 4.010330137438927e-05, "loss": 0.7552645206451416, "step": 5246 }, { "epoch": 6.438036809815951, "grad_norm": 0.270214706659317, "learning_rate": 4.009930138059518e-05, "loss": 0.6854848265647888, "step": 5247 }, { "epoch": 6.439263803680982, "grad_norm": 0.29430311918258667, "learning_rate": 4.009530077819131e-05, "loss": 0.6054472923278809, "step": 5248 }, { "epoch": 6.440490797546012, "grad_norm": 0.24526716768741608, "learning_rate": 4.009129956733892e-05, "loss": 0.7039870023727417, "step": 5249 }, { "epoch": 6.441717791411043, "grad_norm": 0.2435961663722992, "learning_rate": 4.008729774819929e-05, "loss": 0.6753126978874207, "step": 5250 }, { "epoch": 6.442944785276073, "grad_norm": 0.2547881305217743, "learning_rate": 4.008329532093371e-05, "loss": 0.7894333600997925, "step": 5251 }, { "epoch": 6.444171779141104, "grad_norm": 0.25736698508262634, "learning_rate": 4.007929228570352e-05, "loss": 0.8459957838058472, "step": 5252 }, { "epoch": 6.445398773006135, "grad_norm": 0.26824748516082764, "learning_rate": 4.007528864267007e-05, "loss": 0.8008431196212769, "step": 5253 }, { "epoch": 6.446625766871166, "grad_norm": 0.27586260437965393, "learning_rate": 4.007128439199473e-05, "loss": 0.716568112373352, "step": 5254 }, { "epoch": 6.447852760736196, "grad_norm": 0.27035975456237793, "learning_rate": 4.0067279533838894e-05, "loss": 0.6513561606407166, "step": 5255 }, { "epoch": 6.449079754601227, "grad_norm": 0.2569769024848938, "learning_rate": 4.0063274068364e-05, "loss": 0.7524026036262512, "step": 5256 }, { "epoch": 6.450306748466257, "grad_norm": 0.27261969447135925, "learning_rate": 4.0059267995731486e-05, "loss": 0.6526316404342651, "step": 5257 }, { "epoch": 6.451533742331288, "grad_norm": 0.2575017511844635, "learning_rate": 4.005526131610283e-05, "loss": 0.7264602780342102, "step": 5258 }, { "epoch": 6.452760736196319, "grad_norm": 0.24902412295341492, "learning_rate": 4.0051254029639534e-05, "loss": 0.6891529560089111, "step": 5259 }, { "epoch": 6.45398773006135, "grad_norm": 0.20485606789588928, "learning_rate": 4.004724613650311e-05, "loss": 0.8405108451843262, "step": 5260 }, { "epoch": 6.45521472392638, "grad_norm": 0.23501144349575043, "learning_rate": 4.0043237636855116e-05, "loss": 0.7784343361854553, "step": 5261 }, { "epoch": 6.456441717791411, "grad_norm": 0.2566266655921936, "learning_rate": 4.003922853085711e-05, "loss": 0.6885643005371094, "step": 5262 }, { "epoch": 6.457668711656442, "grad_norm": 0.2715698182582855, "learning_rate": 4.0035218818670695e-05, "loss": 0.5985674858093262, "step": 5263 }, { "epoch": 6.458895705521472, "grad_norm": 0.36087167263031006, "learning_rate": 4.0031208500457496e-05, "loss": 0.572766125202179, "step": 5264 }, { "epoch": 6.460122699386503, "grad_norm": 0.24967621266841888, "learning_rate": 4.002719757637915e-05, "loss": 0.7181203961372375, "step": 5265 }, { "epoch": 6.461349693251534, "grad_norm": 0.2014106661081314, "learning_rate": 4.002318604659733e-05, "loss": 0.7980003356933594, "step": 5266 }, { "epoch": 6.462576687116565, "grad_norm": 0.29955434799194336, "learning_rate": 4.001917391127373e-05, "loss": 0.6328787803649902, "step": 5267 }, { "epoch": 6.463803680981595, "grad_norm": 0.2707365155220032, "learning_rate": 4.001516117057006e-05, "loss": 0.8412461280822754, "step": 5268 }, { "epoch": 6.465030674846625, "grad_norm": 0.2735099494457245, "learning_rate": 4.001114782464808e-05, "loss": 0.8338009119033813, "step": 5269 }, { "epoch": 6.466257668711656, "grad_norm": 0.2265314757823944, "learning_rate": 4.0007133873669534e-05, "loss": 0.764600396156311, "step": 5270 }, { "epoch": 6.4674846625766875, "grad_norm": 0.2566446363925934, "learning_rate": 4.000311931779622e-05, "loss": 0.8104768991470337, "step": 5271 }, { "epoch": 6.468711656441718, "grad_norm": 0.23339512944221497, "learning_rate": 3.999910415718996e-05, "loss": 0.952476978302002, "step": 5272 }, { "epoch": 6.469938650306749, "grad_norm": 0.23027707636356354, "learning_rate": 3.9995088392012594e-05, "loss": 0.5283755660057068, "step": 5273 }, { "epoch": 6.471165644171779, "grad_norm": 0.26270055770874023, "learning_rate": 3.999107202242598e-05, "loss": 0.7889226675033569, "step": 5274 }, { "epoch": 6.47239263803681, "grad_norm": 0.26336923241615295, "learning_rate": 3.9987055048592e-05, "loss": 0.8061090707778931, "step": 5275 }, { "epoch": 6.47361963190184, "grad_norm": 0.3297610580921173, "learning_rate": 3.998303747067259e-05, "loss": 0.7729164958000183, "step": 5276 }, { "epoch": 6.4748466257668715, "grad_norm": 0.264135479927063, "learning_rate": 3.9979019288829666e-05, "loss": 0.7055637836456299, "step": 5277 }, { "epoch": 6.476073619631902, "grad_norm": 0.2844482362270355, "learning_rate": 3.9975000503225204e-05, "loss": 0.644700825214386, "step": 5278 }, { "epoch": 6.477300613496933, "grad_norm": 0.2833509147167206, "learning_rate": 3.997098111402117e-05, "loss": 0.6940724849700928, "step": 5279 }, { "epoch": 6.478527607361963, "grad_norm": 0.27066507935523987, "learning_rate": 3.996696112137959e-05, "loss": 0.6816269159317017, "step": 5280 }, { "epoch": 6.479754601226994, "grad_norm": 0.22718939185142517, "learning_rate": 3.996294052546249e-05, "loss": 0.7303286790847778, "step": 5281 }, { "epoch": 6.480981595092024, "grad_norm": 0.29046669602394104, "learning_rate": 3.995891932643194e-05, "loss": 0.6168336272239685, "step": 5282 }, { "epoch": 6.4822085889570555, "grad_norm": 0.27057111263275146, "learning_rate": 3.9954897524450015e-05, "loss": 0.8025234937667847, "step": 5283 }, { "epoch": 6.483435582822086, "grad_norm": 0.31053483486175537, "learning_rate": 3.995087511967882e-05, "loss": 0.5556389093399048, "step": 5284 }, { "epoch": 6.484662576687117, "grad_norm": 0.2972628176212311, "learning_rate": 3.994685211228049e-05, "loss": 0.6597254276275635, "step": 5285 }, { "epoch": 6.485889570552147, "grad_norm": 0.3055121600627899, "learning_rate": 3.994282850241718e-05, "loss": 0.6348087787628174, "step": 5286 }, { "epoch": 6.487116564417178, "grad_norm": 0.26777395606040955, "learning_rate": 3.993880429025108e-05, "loss": 0.7614477276802063, "step": 5287 }, { "epoch": 6.488343558282208, "grad_norm": 0.25427085161209106, "learning_rate": 3.993477947594438e-05, "loss": 0.6434071063995361, "step": 5288 }, { "epoch": 6.4895705521472395, "grad_norm": 0.2057809829711914, "learning_rate": 3.993075405965932e-05, "loss": 0.8944029211997986, "step": 5289 }, { "epoch": 6.49079754601227, "grad_norm": 0.24747344851493835, "learning_rate": 3.9926728041558135e-05, "loss": 0.7687223553657532, "step": 5290 }, { "epoch": 6.492024539877301, "grad_norm": 0.2666730582714081, "learning_rate": 3.9922701421803135e-05, "loss": 0.8790172338485718, "step": 5291 }, { "epoch": 6.493251533742331, "grad_norm": 0.2817303538322449, "learning_rate": 3.991867420055658e-05, "loss": 0.5621828436851501, "step": 5292 }, { "epoch": 6.494478527607362, "grad_norm": 0.30118852853775024, "learning_rate": 3.991464637798083e-05, "loss": 0.6922107934951782, "step": 5293 }, { "epoch": 6.495705521472392, "grad_norm": 0.2610633373260498, "learning_rate": 3.991061795423822e-05, "loss": 0.7645841836929321, "step": 5294 }, { "epoch": 6.4969325153374236, "grad_norm": 0.22063866257667542, "learning_rate": 3.9906588929491126e-05, "loss": 0.6204993724822998, "step": 5295 }, { "epoch": 6.498159509202454, "grad_norm": 0.25159940123558044, "learning_rate": 3.9902559303901944e-05, "loss": 0.7026278972625732, "step": 5296 }, { "epoch": 6.499386503067485, "grad_norm": 0.3010179400444031, "learning_rate": 3.989852907763311e-05, "loss": 0.827976405620575, "step": 5297 }, { "epoch": 6.500613496932515, "grad_norm": 0.2883239686489105, "learning_rate": 3.989449825084704e-05, "loss": 0.8523415923118591, "step": 5298 }, { "epoch": 6.501840490797546, "grad_norm": 0.28448402881622314, "learning_rate": 3.989046682370625e-05, "loss": 0.8203685283660889, "step": 5299 }, { "epoch": 6.5030674846625764, "grad_norm": 0.2513897120952606, "learning_rate": 3.98864347963732e-05, "loss": 0.7078511714935303, "step": 5300 }, { "epoch": 6.504294478527608, "grad_norm": 0.26240256428718567, "learning_rate": 3.9882402169010424e-05, "loss": 0.8418930768966675, "step": 5301 }, { "epoch": 6.505521472392638, "grad_norm": 0.2550401985645294, "learning_rate": 3.987836894178046e-05, "loss": 0.6458543539047241, "step": 5302 }, { "epoch": 6.506748466257669, "grad_norm": 0.2791892886161804, "learning_rate": 3.987433511484588e-05, "loss": 0.7503255605697632, "step": 5303 }, { "epoch": 6.507975460122699, "grad_norm": 0.2176334410905838, "learning_rate": 3.987030068836927e-05, "loss": 0.8292560577392578, "step": 5304 }, { "epoch": 6.50920245398773, "grad_norm": 0.24061381816864014, "learning_rate": 3.986626566251325e-05, "loss": 0.8369452357292175, "step": 5305 }, { "epoch": 6.5104294478527605, "grad_norm": 0.284036248922348, "learning_rate": 3.986223003744047e-05, "loss": 0.6708962917327881, "step": 5306 }, { "epoch": 6.511656441717792, "grad_norm": 0.27586251497268677, "learning_rate": 3.985819381331358e-05, "loss": 0.7451338171958923, "step": 5307 }, { "epoch": 6.512883435582822, "grad_norm": 0.19368930160999298, "learning_rate": 3.985415699029527e-05, "loss": 0.8072869777679443, "step": 5308 }, { "epoch": 6.514110429447853, "grad_norm": 0.2694646120071411, "learning_rate": 3.985011956854826e-05, "loss": 0.7063475251197815, "step": 5309 }, { "epoch": 6.515337423312883, "grad_norm": 0.2254197895526886, "learning_rate": 3.984608154823529e-05, "loss": 0.734917938709259, "step": 5310 }, { "epoch": 6.516564417177914, "grad_norm": 0.23232606053352356, "learning_rate": 3.9842042929519114e-05, "loss": 0.7044807076454163, "step": 5311 }, { "epoch": 6.5177914110429445, "grad_norm": 0.24039825797080994, "learning_rate": 3.983800371256252e-05, "loss": 0.8465197086334229, "step": 5312 }, { "epoch": 6.519018404907976, "grad_norm": 0.3713136315345764, "learning_rate": 3.9833963897528305e-05, "loss": 0.6720627546310425, "step": 5313 }, { "epoch": 6.520245398773006, "grad_norm": 0.27688926458358765, "learning_rate": 3.982992348457932e-05, "loss": 0.5904400944709778, "step": 5314 }, { "epoch": 6.521472392638037, "grad_norm": 0.24866560101509094, "learning_rate": 3.9825882473878415e-05, "loss": 0.6659249067306519, "step": 5315 }, { "epoch": 6.522699386503067, "grad_norm": 0.23497149348258972, "learning_rate": 3.9821840865588466e-05, "loss": 0.7726842164993286, "step": 5316 }, { "epoch": 6.523926380368098, "grad_norm": 0.2590956687927246, "learning_rate": 3.9817798659872396e-05, "loss": 0.8432793617248535, "step": 5317 }, { "epoch": 6.5251533742331285, "grad_norm": 0.24585238099098206, "learning_rate": 3.981375585689312e-05, "loss": 0.8336986303329468, "step": 5318 }, { "epoch": 6.52638036809816, "grad_norm": 0.22862325608730316, "learning_rate": 3.980971245681359e-05, "loss": 0.7701413035392761, "step": 5319 }, { "epoch": 6.52760736196319, "grad_norm": 0.2125592827796936, "learning_rate": 3.9805668459796795e-05, "loss": 0.7033641934394836, "step": 5320 }, { "epoch": 6.528834355828221, "grad_norm": 0.23154208064079285, "learning_rate": 3.980162386600573e-05, "loss": 0.8254093527793884, "step": 5321 }, { "epoch": 6.530061349693252, "grad_norm": 0.28850820660591125, "learning_rate": 3.9797578675603426e-05, "loss": 0.6202366352081299, "step": 5322 }, { "epoch": 6.531288343558282, "grad_norm": 0.26407697796821594, "learning_rate": 3.9793532888752923e-05, "loss": 0.6991504430770874, "step": 5323 }, { "epoch": 6.5325153374233125, "grad_norm": 0.20277413725852966, "learning_rate": 3.978948650561731e-05, "loss": 0.8203493356704712, "step": 5324 }, { "epoch": 6.533742331288344, "grad_norm": 0.23208798468112946, "learning_rate": 3.978543952635967e-05, "loss": 0.7501944899559021, "step": 5325 }, { "epoch": 6.534969325153375, "grad_norm": 0.25271546840667725, "learning_rate": 3.978139195114313e-05, "loss": 0.5944037437438965, "step": 5326 }, { "epoch": 6.536196319018405, "grad_norm": 0.34049728512763977, "learning_rate": 3.9777343780130836e-05, "loss": 0.713326096534729, "step": 5327 }, { "epoch": 6.537423312883435, "grad_norm": 0.3022862672805786, "learning_rate": 3.977329501348597e-05, "loss": 0.768928587436676, "step": 5328 }, { "epoch": 6.538650306748466, "grad_norm": 0.26723602414131165, "learning_rate": 3.976924565137172e-05, "loss": 0.9138541221618652, "step": 5329 }, { "epoch": 6.539877300613497, "grad_norm": 0.2202179729938507, "learning_rate": 3.9765195693951295e-05, "loss": 0.8951178789138794, "step": 5330 }, { "epoch": 6.541104294478528, "grad_norm": 0.22206631302833557, "learning_rate": 3.9761145141387937e-05, "loss": 0.723139762878418, "step": 5331 }, { "epoch": 6.542331288343558, "grad_norm": 0.2352093756198883, "learning_rate": 3.9757093993844926e-05, "loss": 0.9899389743804932, "step": 5332 }, { "epoch": 6.543558282208589, "grad_norm": 0.28643882274627686, "learning_rate": 3.9753042251485544e-05, "loss": 0.7652402520179749, "step": 5333 }, { "epoch": 6.54478527607362, "grad_norm": 0.22752569615840912, "learning_rate": 3.9748989914473106e-05, "loss": 0.8429549932479858, "step": 5334 }, { "epoch": 6.54601226993865, "grad_norm": 0.2970803678035736, "learning_rate": 3.9744936982970945e-05, "loss": 0.5949256420135498, "step": 5335 }, { "epoch": 6.5472392638036805, "grad_norm": 0.28159958124160767, "learning_rate": 3.974088345714244e-05, "loss": 0.8972992300987244, "step": 5336 }, { "epoch": 6.548466257668712, "grad_norm": 0.2534463703632355, "learning_rate": 3.973682933715095e-05, "loss": 0.8109191060066223, "step": 5337 }, { "epoch": 6.549693251533743, "grad_norm": 0.24577020108699799, "learning_rate": 3.9732774623159906e-05, "loss": 0.7832570672035217, "step": 5338 }, { "epoch": 6.550920245398773, "grad_norm": 0.25141674280166626, "learning_rate": 3.972871931533274e-05, "loss": 0.925796389579773, "step": 5339 }, { "epoch": 6.552147239263804, "grad_norm": 0.29386717081069946, "learning_rate": 3.97246634138329e-05, "loss": 0.8159717321395874, "step": 5340 }, { "epoch": 6.553374233128834, "grad_norm": 0.291042685508728, "learning_rate": 3.972060691882388e-05, "loss": 0.42956167459487915, "step": 5341 }, { "epoch": 6.554601226993865, "grad_norm": 0.2623380124568939, "learning_rate": 3.9716549830469176e-05, "loss": 0.6424541473388672, "step": 5342 }, { "epoch": 6.555828220858896, "grad_norm": 0.21274958550930023, "learning_rate": 3.971249214893232e-05, "loss": 0.7373268604278564, "step": 5343 }, { "epoch": 6.557055214723927, "grad_norm": 0.22982798516750336, "learning_rate": 3.970843387437687e-05, "loss": 0.707874596118927, "step": 5344 }, { "epoch": 6.558282208588957, "grad_norm": 0.22690799832344055, "learning_rate": 3.9704375006966396e-05, "loss": 0.8271693587303162, "step": 5345 }, { "epoch": 6.559509202453988, "grad_norm": 0.2511795163154602, "learning_rate": 3.970031554686451e-05, "loss": 0.7751701474189758, "step": 5346 }, { "epoch": 6.560736196319018, "grad_norm": 0.2221159040927887, "learning_rate": 3.969625549423483e-05, "loss": 0.860637903213501, "step": 5347 }, { "epoch": 6.561963190184049, "grad_norm": 0.2127687633037567, "learning_rate": 3.9692194849241e-05, "loss": 0.7896760106086731, "step": 5348 }, { "epoch": 6.56319018404908, "grad_norm": 0.2849212884902954, "learning_rate": 3.968813361204671e-05, "loss": 0.7116686701774597, "step": 5349 }, { "epoch": 6.564417177914111, "grad_norm": 0.3184572458267212, "learning_rate": 3.9684071782815635e-05, "loss": 0.5981433391571045, "step": 5350 }, { "epoch": 6.565644171779141, "grad_norm": 0.2357543259859085, "learning_rate": 3.968000936171151e-05, "loss": 0.6885003447532654, "step": 5351 }, { "epoch": 6.566871165644172, "grad_norm": 0.2680750787258148, "learning_rate": 3.967594634889807e-05, "loss": 0.5520545840263367, "step": 5352 }, { "epoch": 6.568098159509202, "grad_norm": 0.24463383853435516, "learning_rate": 3.96718827445391e-05, "loss": 0.7015460729598999, "step": 5353 }, { "epoch": 6.569325153374233, "grad_norm": 0.31223705410957336, "learning_rate": 3.966781854879838e-05, "loss": 0.47989925742149353, "step": 5354 }, { "epoch": 6.570552147239264, "grad_norm": 0.26062509417533875, "learning_rate": 3.966375376183973e-05, "loss": 0.9123284220695496, "step": 5355 }, { "epoch": 6.571779141104295, "grad_norm": 0.24442647397518158, "learning_rate": 3.9659688383826986e-05, "loss": 0.7514827847480774, "step": 5356 }, { "epoch": 6.573006134969325, "grad_norm": 0.2346338927745819, "learning_rate": 3.965562241492401e-05, "loss": 0.7448673248291016, "step": 5357 }, { "epoch": 6.574233128834356, "grad_norm": 0.28635555505752563, "learning_rate": 3.96515558552947e-05, "loss": 0.5938111543655396, "step": 5358 }, { "epoch": 6.575460122699386, "grad_norm": 0.2603766918182373, "learning_rate": 3.964748870510295e-05, "loss": 0.7105481624603271, "step": 5359 }, { "epoch": 6.576687116564417, "grad_norm": 0.24784843623638153, "learning_rate": 3.964342096451272e-05, "loss": 0.6427799463272095, "step": 5360 }, { "epoch": 6.577914110429448, "grad_norm": 0.2631615698337555, "learning_rate": 3.963935263368794e-05, "loss": 0.5554043054580688, "step": 5361 }, { "epoch": 6.579141104294479, "grad_norm": 0.24139656126499176, "learning_rate": 3.963528371279263e-05, "loss": 0.7410520315170288, "step": 5362 }, { "epoch": 6.580368098159509, "grad_norm": 0.23535165190696716, "learning_rate": 3.963121420199076e-05, "loss": 0.801235556602478, "step": 5363 }, { "epoch": 6.58159509202454, "grad_norm": 0.2756485044956207, "learning_rate": 3.962714410144638e-05, "loss": 0.7370539903640747, "step": 5364 }, { "epoch": 6.58282208588957, "grad_norm": 0.2477468103170395, "learning_rate": 3.962307341132353e-05, "loss": 0.8708747029304504, "step": 5365 }, { "epoch": 6.584049079754601, "grad_norm": 0.2825548052787781, "learning_rate": 3.9619002131786306e-05, "loss": 0.7925516963005066, "step": 5366 }, { "epoch": 6.585276073619632, "grad_norm": 0.24048899114131927, "learning_rate": 3.96149302629988e-05, "loss": 0.6435279846191406, "step": 5367 }, { "epoch": 6.586503067484663, "grad_norm": 0.2993827760219574, "learning_rate": 3.961085780512513e-05, "loss": 0.7509903907775879, "step": 5368 }, { "epoch": 6.587730061349693, "grad_norm": 0.2851082384586334, "learning_rate": 3.960678475832946e-05, "loss": 0.8033963441848755, "step": 5369 }, { "epoch": 6.588957055214724, "grad_norm": 0.2943301796913147, "learning_rate": 3.960271112277596e-05, "loss": 0.5457948446273804, "step": 5370 }, { "epoch": 6.590184049079754, "grad_norm": 0.31094464659690857, "learning_rate": 3.9598636898628817e-05, "loss": 0.8288716673851013, "step": 5371 }, { "epoch": 6.591411042944785, "grad_norm": 0.24647605419158936, "learning_rate": 3.959456208605226e-05, "loss": 0.7389910221099854, "step": 5372 }, { "epoch": 6.592638036809816, "grad_norm": 0.22983916103839874, "learning_rate": 3.959048668521052e-05, "loss": 0.7832624912261963, "step": 5373 }, { "epoch": 6.593865030674847, "grad_norm": 0.2782781720161438, "learning_rate": 3.958641069626789e-05, "loss": 0.6956085562705994, "step": 5374 }, { "epoch": 6.595092024539877, "grad_norm": 0.25455358624458313, "learning_rate": 3.9582334119388645e-05, "loss": 0.699530839920044, "step": 5375 }, { "epoch": 6.596319018404908, "grad_norm": 0.2514035701751709, "learning_rate": 3.957825695473709e-05, "loss": 0.6557011604309082, "step": 5376 }, { "epoch": 6.597546012269938, "grad_norm": 0.2787875235080719, "learning_rate": 3.957417920247759e-05, "loss": 0.6944245100021362, "step": 5377 }, { "epoch": 6.598773006134969, "grad_norm": 0.2567584216594696, "learning_rate": 3.957010086277448e-05, "loss": 0.7679136991500854, "step": 5378 }, { "epoch": 6.6, "grad_norm": 0.2528066039085388, "learning_rate": 3.956602193579217e-05, "loss": 0.7667626738548279, "step": 5379 }, { "epoch": 6.601226993865031, "grad_norm": 0.23848272860050201, "learning_rate": 3.956194242169506e-05, "loss": 0.7309097051620483, "step": 5380 }, { "epoch": 6.602453987730061, "grad_norm": 0.3203972578048706, "learning_rate": 3.9557862320647574e-05, "loss": 0.7543508410453796, "step": 5381 }, { "epoch": 6.603680981595092, "grad_norm": 0.21431048214435577, "learning_rate": 3.955378163281418e-05, "loss": 0.775003969669342, "step": 5382 }, { "epoch": 6.604907975460122, "grad_norm": 0.260863721370697, "learning_rate": 3.9549700358359364e-05, "loss": 0.6977576613426208, "step": 5383 }, { "epoch": 6.606134969325153, "grad_norm": 0.2659607529640198, "learning_rate": 3.954561849744761e-05, "loss": 0.6387976408004761, "step": 5384 }, { "epoch": 6.6073619631901845, "grad_norm": 0.3458225131034851, "learning_rate": 3.954153605024348e-05, "loss": 0.7584786415100098, "step": 5385 }, { "epoch": 6.608588957055215, "grad_norm": 0.34457167983055115, "learning_rate": 3.953745301691149e-05, "loss": 0.4799997806549072, "step": 5386 }, { "epoch": 6.609815950920245, "grad_norm": 0.2542145550251007, "learning_rate": 3.953336939761624e-05, "loss": 0.6950559020042419, "step": 5387 }, { "epoch": 6.611042944785276, "grad_norm": 0.2442850023508072, "learning_rate": 3.9529285192522315e-05, "loss": 0.8178507089614868, "step": 5388 }, { "epoch": 6.612269938650307, "grad_norm": 0.35579997301101685, "learning_rate": 3.952520040179434e-05, "loss": 0.4968506693840027, "step": 5389 }, { "epoch": 6.613496932515337, "grad_norm": 0.2567848861217499, "learning_rate": 3.952111502559697e-05, "loss": 0.8062777519226074, "step": 5390 }, { "epoch": 6.614723926380368, "grad_norm": 0.33519691228866577, "learning_rate": 3.951702906409487e-05, "loss": 0.4248184561729431, "step": 5391 }, { "epoch": 6.615950920245399, "grad_norm": 0.2574419677257538, "learning_rate": 3.951294251745272e-05, "loss": 0.7103166580200195, "step": 5392 }, { "epoch": 6.61717791411043, "grad_norm": 0.28876909613609314, "learning_rate": 3.950885538583526e-05, "loss": 0.537303626537323, "step": 5393 }, { "epoch": 6.61840490797546, "grad_norm": 0.23987719416618347, "learning_rate": 3.9504767669407216e-05, "loss": 0.6581161022186279, "step": 5394 }, { "epoch": 6.61963190184049, "grad_norm": 0.2677685618400574, "learning_rate": 3.9500679368333357e-05, "loss": 0.7392991185188293, "step": 5395 }, { "epoch": 6.620858895705521, "grad_norm": 0.3291289210319519, "learning_rate": 3.949659048277847e-05, "loss": 0.5999454259872437, "step": 5396 }, { "epoch": 6.6220858895705526, "grad_norm": 0.3112011253833771, "learning_rate": 3.949250101290737e-05, "loss": 0.732063889503479, "step": 5397 }, { "epoch": 6.623312883435583, "grad_norm": 0.18334834277629852, "learning_rate": 3.948841095888488e-05, "loss": 0.9637864828109741, "step": 5398 }, { "epoch": 6.624539877300613, "grad_norm": 0.24155007302761078, "learning_rate": 3.948432032087588e-05, "loss": 0.8599637746810913, "step": 5399 }, { "epoch": 6.625766871165644, "grad_norm": 0.27845415472984314, "learning_rate": 3.948022909904523e-05, "loss": 0.6196447610855103, "step": 5400 }, { "epoch": 6.626993865030675, "grad_norm": 0.29829472303390503, "learning_rate": 3.9476137293557836e-05, "loss": 0.5520572662353516, "step": 5401 }, { "epoch": 6.6282208588957054, "grad_norm": 0.2783106565475464, "learning_rate": 3.947204490457865e-05, "loss": 0.7876102924346924, "step": 5402 }, { "epoch": 6.629447852760737, "grad_norm": 0.19209600985050201, "learning_rate": 3.94679519322726e-05, "loss": 0.8482527732849121, "step": 5403 }, { "epoch": 6.630674846625767, "grad_norm": 0.28003278374671936, "learning_rate": 3.946385837680467e-05, "loss": 0.7328038215637207, "step": 5404 }, { "epoch": 6.631901840490798, "grad_norm": 0.2369908094406128, "learning_rate": 3.9459764238339865e-05, "loss": 0.8532731533050537, "step": 5405 }, { "epoch": 6.633128834355828, "grad_norm": 0.2693164050579071, "learning_rate": 3.9455669517043206e-05, "loss": 0.8178693652153015, "step": 5406 }, { "epoch": 6.634355828220859, "grad_norm": 0.2927023470401764, "learning_rate": 3.945157421307973e-05, "loss": 0.6712958812713623, "step": 5407 }, { "epoch": 6.6355828220858895, "grad_norm": 0.22934181988239288, "learning_rate": 3.944747832661452e-05, "loss": 0.7658447623252869, "step": 5408 }, { "epoch": 6.636809815950921, "grad_norm": 0.26085320115089417, "learning_rate": 3.9443381857812665e-05, "loss": 0.6999931335449219, "step": 5409 }, { "epoch": 6.638036809815951, "grad_norm": 0.22198043763637543, "learning_rate": 3.9439284806839286e-05, "loss": 0.7670865654945374, "step": 5410 }, { "epoch": 6.639263803680982, "grad_norm": 0.31418195366859436, "learning_rate": 3.943518717385951e-05, "loss": 0.5220214128494263, "step": 5411 }, { "epoch": 6.640490797546012, "grad_norm": 0.29592108726501465, "learning_rate": 3.943108895903851e-05, "loss": 0.6608266234397888, "step": 5412 }, { "epoch": 6.641717791411043, "grad_norm": 0.2819206416606903, "learning_rate": 3.9426990162541475e-05, "loss": 0.7170333862304688, "step": 5413 }, { "epoch": 6.6429447852760735, "grad_norm": 0.28314098715782166, "learning_rate": 3.9422890784533616e-05, "loss": 0.8091933727264404, "step": 5414 }, { "epoch": 6.644171779141105, "grad_norm": 0.2911204397678375, "learning_rate": 3.9418790825180163e-05, "loss": 0.6574269533157349, "step": 5415 }, { "epoch": 6.645398773006135, "grad_norm": 0.26210838556289673, "learning_rate": 3.941469028464637e-05, "loss": 0.6327621936798096, "step": 5416 }, { "epoch": 6.646625766871166, "grad_norm": 0.2579224109649658, "learning_rate": 3.941058916309753e-05, "loss": 0.5659319162368774, "step": 5417 }, { "epoch": 6.647852760736196, "grad_norm": 0.3213135004043579, "learning_rate": 3.940648746069895e-05, "loss": 0.5760035514831543, "step": 5418 }, { "epoch": 6.649079754601227, "grad_norm": 0.2460862249135971, "learning_rate": 3.940238517761593e-05, "loss": 0.8463797569274902, "step": 5419 }, { "epoch": 6.6503067484662575, "grad_norm": 0.2529328763484955, "learning_rate": 3.939828231401385e-05, "loss": 0.6344667673110962, "step": 5420 }, { "epoch": 6.651533742331289, "grad_norm": 0.23670461773872375, "learning_rate": 3.939417887005808e-05, "loss": 0.6531614065170288, "step": 5421 }, { "epoch": 6.652760736196319, "grad_norm": 0.3089958727359772, "learning_rate": 3.9390074845914006e-05, "loss": 0.7278796434402466, "step": 5422 }, { "epoch": 6.65398773006135, "grad_norm": 0.22811417281627655, "learning_rate": 3.938597024174706e-05, "loss": 0.8631652593612671, "step": 5423 }, { "epoch": 6.65521472392638, "grad_norm": 0.2375156283378601, "learning_rate": 3.938186505772268e-05, "loss": 0.9006096124649048, "step": 5424 }, { "epoch": 6.656441717791411, "grad_norm": 0.32185640931129456, "learning_rate": 3.937775929400634e-05, "loss": 0.5719070434570312, "step": 5425 }, { "epoch": 6.6576687116564415, "grad_norm": 0.259054034948349, "learning_rate": 3.937365295076353e-05, "loss": 0.6506443023681641, "step": 5426 }, { "epoch": 6.658895705521473, "grad_norm": 0.23607498407363892, "learning_rate": 3.936954602815976e-05, "loss": 0.7032852172851562, "step": 5427 }, { "epoch": 6.660122699386503, "grad_norm": 0.287910521030426, "learning_rate": 3.9365438526360576e-05, "loss": 0.6831657290458679, "step": 5428 }, { "epoch": 6.661349693251534, "grad_norm": 0.28494590520858765, "learning_rate": 3.936133044553154e-05, "loss": 0.6098678112030029, "step": 5429 }, { "epoch": 6.662576687116564, "grad_norm": 0.2767737805843353, "learning_rate": 3.9357221785838226e-05, "loss": 0.7256125807762146, "step": 5430 }, { "epoch": 6.663803680981595, "grad_norm": 0.2840648293495178, "learning_rate": 3.935311254744626e-05, "loss": 0.7511664628982544, "step": 5431 }, { "epoch": 6.6650306748466255, "grad_norm": 0.27875494956970215, "learning_rate": 3.934900273052126e-05, "loss": 0.9077833890914917, "step": 5432 }, { "epoch": 6.666257668711657, "grad_norm": 0.26748180389404297, "learning_rate": 3.9344892335228876e-05, "loss": 0.629853367805481, "step": 5433 }, { "epoch": 6.667484662576687, "grad_norm": 0.23867003619670868, "learning_rate": 3.934078136173481e-05, "loss": 0.7295451164245605, "step": 5434 }, { "epoch": 6.668711656441718, "grad_norm": 0.2335153967142105, "learning_rate": 3.933666981020474e-05, "loss": 0.7129398584365845, "step": 5435 }, { "epoch": 6.669938650306748, "grad_norm": 0.27602824568748474, "learning_rate": 3.933255768080439e-05, "loss": 0.7040311098098755, "step": 5436 }, { "epoch": 6.671165644171779, "grad_norm": 0.2285580188035965, "learning_rate": 3.9328444973699536e-05, "loss": 0.7072001695632935, "step": 5437 }, { "epoch": 6.6723926380368095, "grad_norm": 0.25681978464126587, "learning_rate": 3.932433168905591e-05, "loss": 0.7358304262161255, "step": 5438 }, { "epoch": 6.673619631901841, "grad_norm": 0.26061317324638367, "learning_rate": 3.9320217827039354e-05, "loss": 0.6384345293045044, "step": 5439 }, { "epoch": 6.674846625766871, "grad_norm": 0.26170098781585693, "learning_rate": 3.931610338781564e-05, "loss": 0.7322404384613037, "step": 5440 }, { "epoch": 6.676073619631902, "grad_norm": 0.2383810579776764, "learning_rate": 3.9311988371550636e-05, "loss": 0.6054210662841797, "step": 5441 }, { "epoch": 6.677300613496932, "grad_norm": 0.30093517899513245, "learning_rate": 3.930787277841021e-05, "loss": 0.7276414632797241, "step": 5442 }, { "epoch": 6.678527607361963, "grad_norm": 0.2855135500431061, "learning_rate": 3.9303756608560214e-05, "loss": 0.7687692046165466, "step": 5443 }, { "epoch": 6.6797546012269935, "grad_norm": 0.2741541564464569, "learning_rate": 3.9299639862166606e-05, "loss": 0.7383292317390442, "step": 5444 }, { "epoch": 6.680981595092025, "grad_norm": 0.26329174637794495, "learning_rate": 3.9295522539395294e-05, "loss": 0.7627216577529907, "step": 5445 }, { "epoch": 6.682208588957055, "grad_norm": 0.3017370104789734, "learning_rate": 3.9291404640412235e-05, "loss": 0.4908004403114319, "step": 5446 }, { "epoch": 6.683435582822086, "grad_norm": 0.24260544776916504, "learning_rate": 3.928728616538342e-05, "loss": 0.7534165382385254, "step": 5447 }, { "epoch": 6.684662576687117, "grad_norm": 0.23889155685901642, "learning_rate": 3.928316711447485e-05, "loss": 0.8776600956916809, "step": 5448 }, { "epoch": 6.685889570552147, "grad_norm": 0.2680078148841858, "learning_rate": 3.9279047487852546e-05, "loss": 0.7835155725479126, "step": 5449 }, { "epoch": 6.6871165644171775, "grad_norm": 0.28164345026016235, "learning_rate": 3.927492728568256e-05, "loss": 0.7371788620948792, "step": 5450 }, { "epoch": 6.688343558282209, "grad_norm": 0.22050674259662628, "learning_rate": 3.927080650813098e-05, "loss": 0.7812517881393433, "step": 5451 }, { "epoch": 6.68957055214724, "grad_norm": 0.25773605704307556, "learning_rate": 3.926668515536389e-05, "loss": 0.8144861459732056, "step": 5452 }, { "epoch": 6.69079754601227, "grad_norm": 0.27395716309547424, "learning_rate": 3.92625632275474e-05, "loss": 0.6777904629707336, "step": 5453 }, { "epoch": 6.6920245398773, "grad_norm": 0.2583969235420227, "learning_rate": 3.9258440724847665e-05, "loss": 0.5873717665672302, "step": 5454 }, { "epoch": 6.693251533742331, "grad_norm": 0.2669885754585266, "learning_rate": 3.925431764743086e-05, "loss": 0.7977827191352844, "step": 5455 }, { "epoch": 6.694478527607362, "grad_norm": 0.272457480430603, "learning_rate": 3.9250193995463165e-05, "loss": 0.6236673593521118, "step": 5456 }, { "epoch": 6.695705521472393, "grad_norm": 0.22789359092712402, "learning_rate": 3.9246069769110794e-05, "loss": 0.9325246810913086, "step": 5457 }, { "epoch": 6.696932515337423, "grad_norm": 0.19323508441448212, "learning_rate": 3.924194496853997e-05, "loss": 0.7178403735160828, "step": 5458 }, { "epoch": 6.698159509202454, "grad_norm": 0.2789275646209717, "learning_rate": 3.923781959391697e-05, "loss": 0.7887891530990601, "step": 5459 }, { "epoch": 6.699386503067485, "grad_norm": 0.21143734455108643, "learning_rate": 3.923369364540807e-05, "loss": 0.7901533842086792, "step": 5460 }, { "epoch": 6.700613496932515, "grad_norm": 0.21492567658424377, "learning_rate": 3.922956712317958e-05, "loss": 0.7760301828384399, "step": 5461 }, { "epoch": 6.7018404907975455, "grad_norm": 0.2978813052177429, "learning_rate": 3.922544002739781e-05, "loss": 0.7277374267578125, "step": 5462 }, { "epoch": 6.703067484662577, "grad_norm": 0.22349046170711517, "learning_rate": 3.922131235822914e-05, "loss": 0.7902638912200928, "step": 5463 }, { "epoch": 6.704294478527608, "grad_norm": 0.3152698874473572, "learning_rate": 3.9217184115839925e-05, "loss": 0.6755245923995972, "step": 5464 }, { "epoch": 6.705521472392638, "grad_norm": 0.26784393191337585, "learning_rate": 3.9213055300396565e-05, "loss": 0.5766227841377258, "step": 5465 }, { "epoch": 6.706748466257669, "grad_norm": 0.30898627638816833, "learning_rate": 3.920892591206547e-05, "loss": 0.7748966217041016, "step": 5466 }, { "epoch": 6.707975460122699, "grad_norm": 0.28509142994880676, "learning_rate": 3.920479595101312e-05, "loss": 0.6528287529945374, "step": 5467 }, { "epoch": 6.70920245398773, "grad_norm": 0.32421189546585083, "learning_rate": 3.920066541740595e-05, "loss": 0.7016320824623108, "step": 5468 }, { "epoch": 6.710429447852761, "grad_norm": 0.2488919198513031, "learning_rate": 3.9196534311410446e-05, "loss": 0.6687291860580444, "step": 5469 }, { "epoch": 6.711656441717792, "grad_norm": 0.295244038105011, "learning_rate": 3.919240263319315e-05, "loss": 0.7174102067947388, "step": 5470 }, { "epoch": 6.712883435582822, "grad_norm": 0.24836571514606476, "learning_rate": 3.918827038292057e-05, "loss": 0.7455564737319946, "step": 5471 }, { "epoch": 6.714110429447853, "grad_norm": 0.24233528971672058, "learning_rate": 3.918413756075928e-05, "loss": 0.8076599836349487, "step": 5472 }, { "epoch": 6.715337423312883, "grad_norm": 0.28230854868888855, "learning_rate": 3.9180004166875864e-05, "loss": 0.6749839782714844, "step": 5473 }, { "epoch": 6.716564417177914, "grad_norm": 0.2519897222518921, "learning_rate": 3.917587020143693e-05, "loss": 0.7600489854812622, "step": 5474 }, { "epoch": 6.717791411042945, "grad_norm": 0.23183251917362213, "learning_rate": 3.917173566460908e-05, "loss": 0.6593212485313416, "step": 5475 }, { "epoch": 6.719018404907976, "grad_norm": 0.24858585000038147, "learning_rate": 3.9167600556559e-05, "loss": 0.7581737041473389, "step": 5476 }, { "epoch": 6.720245398773006, "grad_norm": 0.23928487300872803, "learning_rate": 3.9163464877453336e-05, "loss": 0.7057120203971863, "step": 5477 }, { "epoch": 6.721472392638037, "grad_norm": 0.2221064418554306, "learning_rate": 3.915932862745881e-05, "loss": 0.7999484539031982, "step": 5478 }, { "epoch": 6.722699386503067, "grad_norm": 0.35412222146987915, "learning_rate": 3.915519180674213e-05, "loss": 0.532484769821167, "step": 5479 }, { "epoch": 6.723926380368098, "grad_norm": 0.30301088094711304, "learning_rate": 3.915105441547003e-05, "loss": 0.6001980304718018, "step": 5480 }, { "epoch": 6.725153374233129, "grad_norm": 0.2410973757505417, "learning_rate": 3.914691645380929e-05, "loss": 0.6826668381690979, "step": 5481 }, { "epoch": 6.72638036809816, "grad_norm": 0.28351929783821106, "learning_rate": 3.91427779219267e-05, "loss": 0.7340754270553589, "step": 5482 }, { "epoch": 6.72760736196319, "grad_norm": 0.2796561121940613, "learning_rate": 3.913863881998907e-05, "loss": 0.6895279884338379, "step": 5483 }, { "epoch": 6.728834355828221, "grad_norm": 0.2478562444448471, "learning_rate": 3.913449914816323e-05, "loss": 0.8035140037536621, "step": 5484 }, { "epoch": 6.730061349693251, "grad_norm": 0.24758419394493103, "learning_rate": 3.9130358906616036e-05, "loss": 0.8537224531173706, "step": 5485 }, { "epoch": 6.731288343558282, "grad_norm": 0.2109624743461609, "learning_rate": 3.912621809551439e-05, "loss": 0.8047735691070557, "step": 5486 }, { "epoch": 6.732515337423313, "grad_norm": 0.2712368369102478, "learning_rate": 3.912207671502518e-05, "loss": 0.6425692439079285, "step": 5487 }, { "epoch": 6.733742331288344, "grad_norm": 0.3058558702468872, "learning_rate": 3.911793476531532e-05, "loss": 0.7267236709594727, "step": 5488 }, { "epoch": 6.734969325153374, "grad_norm": 0.20487326383590698, "learning_rate": 3.911379224655179e-05, "loss": 0.7824127078056335, "step": 5489 }, { "epoch": 6.736196319018405, "grad_norm": 0.22328898310661316, "learning_rate": 3.910964915890154e-05, "loss": 0.7225578427314758, "step": 5490 }, { "epoch": 6.737423312883435, "grad_norm": 0.251598984003067, "learning_rate": 3.9105505502531584e-05, "loss": 0.8117345571517944, "step": 5491 }, { "epoch": 6.738650306748466, "grad_norm": 0.3050127625465393, "learning_rate": 3.910136127760892e-05, "loss": 0.6170384287834167, "step": 5492 }, { "epoch": 6.739877300613497, "grad_norm": 0.3640877306461334, "learning_rate": 3.90972164843006e-05, "loss": 0.6897494792938232, "step": 5493 }, { "epoch": 6.741104294478528, "grad_norm": 0.28080540895462036, "learning_rate": 3.90930711227737e-05, "loss": 0.6991904377937317, "step": 5494 }, { "epoch": 6.742331288343558, "grad_norm": 0.2272559553384781, "learning_rate": 3.908892519319529e-05, "loss": 0.7404782772064209, "step": 5495 }, { "epoch": 6.743558282208589, "grad_norm": 0.26906222105026245, "learning_rate": 3.9084778695732496e-05, "loss": 0.5185920000076294, "step": 5496 }, { "epoch": 6.744785276073619, "grad_norm": 0.2665277421474457, "learning_rate": 3.908063163055244e-05, "loss": 0.7411189079284668, "step": 5497 }, { "epoch": 6.74601226993865, "grad_norm": 0.2568955719470978, "learning_rate": 3.9076483997822286e-05, "loss": 0.9612547159194946, "step": 5498 }, { "epoch": 6.747239263803681, "grad_norm": 0.2317093312740326, "learning_rate": 3.90723357977092e-05, "loss": 0.539323091506958, "step": 5499 }, { "epoch": 6.748466257668712, "grad_norm": 0.28841838240623474, "learning_rate": 3.90681870303804e-05, "loss": 0.7601889371871948, "step": 5500 }, { "epoch": 6.749693251533742, "grad_norm": 0.2673278748989105, "learning_rate": 3.906403769600311e-05, "loss": 0.7905702590942383, "step": 5501 }, { "epoch": 6.750920245398773, "grad_norm": 0.27942603826522827, "learning_rate": 3.9059887794744566e-05, "loss": 0.8320192098617554, "step": 5502 }, { "epoch": 6.752147239263803, "grad_norm": 0.23011846840381622, "learning_rate": 3.9055737326772044e-05, "loss": 0.7289665937423706, "step": 5503 }, { "epoch": 6.7533742331288344, "grad_norm": 0.214580699801445, "learning_rate": 3.9051586292252834e-05, "loss": 0.9001915454864502, "step": 5504 }, { "epoch": 6.754601226993865, "grad_norm": 0.26505449414253235, "learning_rate": 3.904743469135426e-05, "loss": 0.6685622334480286, "step": 5505 }, { "epoch": 6.755828220858896, "grad_norm": 0.29338380694389343, "learning_rate": 3.904328252424366e-05, "loss": 0.7891194820404053, "step": 5506 }, { "epoch": 6.757055214723926, "grad_norm": 0.27981454133987427, "learning_rate": 3.9039129791088396e-05, "loss": 0.6131219863891602, "step": 5507 }, { "epoch": 6.758282208588957, "grad_norm": 0.27657726407051086, "learning_rate": 3.9034976492055855e-05, "loss": 0.5764015316963196, "step": 5508 }, { "epoch": 6.759509202453987, "grad_norm": 0.281040221452713, "learning_rate": 3.9030822627313433e-05, "loss": 0.6800937652587891, "step": 5509 }, { "epoch": 6.7607361963190185, "grad_norm": 0.29241323471069336, "learning_rate": 3.9026668197028574e-05, "loss": 0.7093517780303955, "step": 5510 }, { "epoch": 6.76196319018405, "grad_norm": 0.28761178255081177, "learning_rate": 3.902251320136872e-05, "loss": 0.5794106721878052, "step": 5511 }, { "epoch": 6.76319018404908, "grad_norm": 0.23256513476371765, "learning_rate": 3.901835764050135e-05, "loss": 0.8231546878814697, "step": 5512 }, { "epoch": 6.76441717791411, "grad_norm": 0.29640600085258484, "learning_rate": 3.9014201514593975e-05, "loss": 0.6622867584228516, "step": 5513 }, { "epoch": 6.765644171779141, "grad_norm": 0.2055651843547821, "learning_rate": 3.9010044823814095e-05, "loss": 0.8591861724853516, "step": 5514 }, { "epoch": 6.766871165644172, "grad_norm": 0.2581001818180084, "learning_rate": 3.9005887568329275e-05, "loss": 0.5993175506591797, "step": 5515 }, { "epoch": 6.7680981595092025, "grad_norm": 0.2932116389274597, "learning_rate": 3.900172974830707e-05, "loss": 0.6343483328819275, "step": 5516 }, { "epoch": 6.769325153374233, "grad_norm": 0.22760184109210968, "learning_rate": 3.899757136391507e-05, "loss": 0.8447046279907227, "step": 5517 }, { "epoch": 6.770552147239264, "grad_norm": 0.22289429605007172, "learning_rate": 3.899341241532089e-05, "loss": 0.895439624786377, "step": 5518 }, { "epoch": 6.771779141104295, "grad_norm": 0.24933500587940216, "learning_rate": 3.898925290269217e-05, "loss": 0.9081094264984131, "step": 5519 }, { "epoch": 6.773006134969325, "grad_norm": 0.23516471683979034, "learning_rate": 3.8985092826196566e-05, "loss": 0.6696373224258423, "step": 5520 }, { "epoch": 6.774233128834355, "grad_norm": 0.324919730424881, "learning_rate": 3.898093218600176e-05, "loss": 0.7151868343353271, "step": 5521 }, { "epoch": 6.7754601226993865, "grad_norm": 0.3281179368495941, "learning_rate": 3.8976770982275446e-05, "loss": 0.7127185463905334, "step": 5522 }, { "epoch": 6.776687116564418, "grad_norm": 0.31696364283561707, "learning_rate": 3.897260921518535e-05, "loss": 0.5580114126205444, "step": 5523 }, { "epoch": 6.777914110429448, "grad_norm": 0.25636452436447144, "learning_rate": 3.896844688489924e-05, "loss": 0.8214384317398071, "step": 5524 }, { "epoch": 6.779141104294479, "grad_norm": 0.2215525358915329, "learning_rate": 3.896428399158487e-05, "loss": 0.9243420362472534, "step": 5525 }, { "epoch": 6.780368098159509, "grad_norm": 0.22378607094287872, "learning_rate": 3.896012053541004e-05, "loss": 0.6745320558547974, "step": 5526 }, { "epoch": 6.78159509202454, "grad_norm": 0.24528205394744873, "learning_rate": 3.895595651654257e-05, "loss": 0.7973473072052002, "step": 5527 }, { "epoch": 6.7828220858895705, "grad_norm": 0.29073643684387207, "learning_rate": 3.89517919351503e-05, "loss": 0.704216480255127, "step": 5528 }, { "epoch": 6.784049079754602, "grad_norm": 0.22372876107692719, "learning_rate": 3.8947626791401074e-05, "loss": 0.7895127534866333, "step": 5529 }, { "epoch": 6.785276073619632, "grad_norm": 0.2420777827501297, "learning_rate": 3.89434610854628e-05, "loss": 0.8014940023422241, "step": 5530 }, { "epoch": 6.786503067484663, "grad_norm": 0.28247925639152527, "learning_rate": 3.893929481750338e-05, "loss": 0.6947696805000305, "step": 5531 }, { "epoch": 6.787730061349693, "grad_norm": 0.2418750822544098, "learning_rate": 3.893512798769074e-05, "loss": 0.7035265564918518, "step": 5532 }, { "epoch": 6.788957055214724, "grad_norm": 0.3717968165874481, "learning_rate": 3.8930960596192835e-05, "loss": 0.5933363437652588, "step": 5533 }, { "epoch": 6.7901840490797545, "grad_norm": 0.3216930031776428, "learning_rate": 3.892679264317764e-05, "loss": 0.5551850199699402, "step": 5534 }, { "epoch": 6.791411042944786, "grad_norm": 0.287868857383728, "learning_rate": 3.892262412881316e-05, "loss": 0.773733377456665, "step": 5535 }, { "epoch": 6.792638036809816, "grad_norm": 0.2638390362262726, "learning_rate": 3.89184550532674e-05, "loss": 0.7489129900932312, "step": 5536 }, { "epoch": 6.793865030674847, "grad_norm": 0.2446446716785431, "learning_rate": 3.8914285416708416e-05, "loss": 0.8405195474624634, "step": 5537 }, { "epoch": 6.795092024539877, "grad_norm": 0.24413815140724182, "learning_rate": 3.891011521930428e-05, "loss": 0.6548327207565308, "step": 5538 }, { "epoch": 6.796319018404908, "grad_norm": 0.2576320171356201, "learning_rate": 3.8905944461223063e-05, "loss": 0.7986465692520142, "step": 5539 }, { "epoch": 6.7975460122699385, "grad_norm": 0.922431468963623, "learning_rate": 3.890177314263289e-05, "loss": 0.6541160345077515, "step": 5540 }, { "epoch": 6.79877300613497, "grad_norm": 0.33896785974502563, "learning_rate": 3.889760126370189e-05, "loss": 0.6751694679260254, "step": 5541 }, { "epoch": 6.8, "grad_norm": 0.27691516280174255, "learning_rate": 3.889342882459822e-05, "loss": 0.7086690664291382, "step": 5542 }, { "epoch": 6.801226993865031, "grad_norm": 0.30037909746170044, "learning_rate": 3.888925582549006e-05, "loss": 0.5349816679954529, "step": 5543 }, { "epoch": 6.802453987730061, "grad_norm": 0.25055018067359924, "learning_rate": 3.888508226654561e-05, "loss": 0.6875405311584473, "step": 5544 }, { "epoch": 6.803680981595092, "grad_norm": 0.6550738215446472, "learning_rate": 3.8880908147933096e-05, "loss": 0.7440522909164429, "step": 5545 }, { "epoch": 6.8049079754601225, "grad_norm": 0.3566477298736572, "learning_rate": 3.887673346982076e-05, "loss": 0.5483524799346924, "step": 5546 }, { "epoch": 6.806134969325154, "grad_norm": 0.36570265889167786, "learning_rate": 3.8872558232376885e-05, "loss": 0.6865929365158081, "step": 5547 }, { "epoch": 6.807361963190184, "grad_norm": 0.29184988141059875, "learning_rate": 3.8868382435769747e-05, "loss": 0.7441672086715698, "step": 5548 }, { "epoch": 6.808588957055215, "grad_norm": 0.3164273798465729, "learning_rate": 3.8864206080167666e-05, "loss": 0.7901269197463989, "step": 5549 }, { "epoch": 6.809815950920245, "grad_norm": 0.24808944761753082, "learning_rate": 3.886002916573898e-05, "loss": 0.7192596793174744, "step": 5550 }, { "epoch": 6.811042944785276, "grad_norm": 0.31959205865859985, "learning_rate": 3.885585169265205e-05, "loss": 0.7989943623542786, "step": 5551 }, { "epoch": 6.8122699386503065, "grad_norm": 0.26556655764579773, "learning_rate": 3.885167366107526e-05, "loss": 0.7833027839660645, "step": 5552 }, { "epoch": 6.813496932515338, "grad_norm": 0.32007575035095215, "learning_rate": 3.8847495071177e-05, "loss": 0.5351754426956177, "step": 5553 }, { "epoch": 6.814723926380368, "grad_norm": 0.3056017756462097, "learning_rate": 3.884331592312572e-05, "loss": 0.7099443674087524, "step": 5554 }, { "epoch": 6.815950920245399, "grad_norm": 0.25259387493133545, "learning_rate": 3.883913621708985e-05, "loss": 0.8225876092910767, "step": 5555 }, { "epoch": 6.817177914110429, "grad_norm": 0.29973599314689636, "learning_rate": 3.8834955953237875e-05, "loss": 0.67396080493927, "step": 5556 }, { "epoch": 6.81840490797546, "grad_norm": 0.3010457456111908, "learning_rate": 3.883077513173828e-05, "loss": 0.6295837163925171, "step": 5557 }, { "epoch": 6.8196319018404905, "grad_norm": 0.25171536207199097, "learning_rate": 3.882659375275959e-05, "loss": 0.7218742966651917, "step": 5558 }, { "epoch": 6.820858895705522, "grad_norm": 0.23707255721092224, "learning_rate": 3.882241181647034e-05, "loss": 0.8966013789176941, "step": 5559 }, { "epoch": 6.822085889570552, "grad_norm": 0.2171134054660797, "learning_rate": 3.881822932303909e-05, "loss": 0.8301767110824585, "step": 5560 }, { "epoch": 6.823312883435583, "grad_norm": 0.2862348258495331, "learning_rate": 3.8814046272634424e-05, "loss": 0.6644917726516724, "step": 5561 }, { "epoch": 6.824539877300613, "grad_norm": 0.26554539799690247, "learning_rate": 3.8809862665424955e-05, "loss": 0.6554297208786011, "step": 5562 }, { "epoch": 6.825766871165644, "grad_norm": 0.24606767296791077, "learning_rate": 3.8805678501579315e-05, "loss": 0.70333331823349, "step": 5563 }, { "epoch": 6.8269938650306745, "grad_norm": 0.23907800018787384, "learning_rate": 3.880149378126614e-05, "loss": 0.643448531627655, "step": 5564 }, { "epoch": 6.828220858895706, "grad_norm": 0.2531842291355133, "learning_rate": 3.8797308504654116e-05, "loss": 0.7920911312103271, "step": 5565 }, { "epoch": 6.829447852760736, "grad_norm": 0.3034652769565582, "learning_rate": 3.879312267191194e-05, "loss": 0.8773581385612488, "step": 5566 }, { "epoch": 6.830674846625767, "grad_norm": 0.23662632703781128, "learning_rate": 3.878893628320833e-05, "loss": 0.7161486744880676, "step": 5567 }, { "epoch": 6.831901840490797, "grad_norm": 0.24259690940380096, "learning_rate": 3.8784749338712025e-05, "loss": 0.7972775101661682, "step": 5568 }, { "epoch": 6.833128834355828, "grad_norm": 0.2730180621147156, "learning_rate": 3.8780561838591775e-05, "loss": 0.7945282459259033, "step": 5569 }, { "epoch": 6.8343558282208585, "grad_norm": 0.2628921866416931, "learning_rate": 3.8776373783016396e-05, "loss": 0.856953501701355, "step": 5570 }, { "epoch": 6.83558282208589, "grad_norm": 0.28337186574935913, "learning_rate": 3.8772185172154676e-05, "loss": 0.7297005653381348, "step": 5571 }, { "epoch": 6.83680981595092, "grad_norm": 0.31758230924606323, "learning_rate": 3.8767996006175454e-05, "loss": 0.5280922651290894, "step": 5572 }, { "epoch": 6.838036809815951, "grad_norm": 0.26729264855384827, "learning_rate": 3.876380628524758e-05, "loss": 0.5332252383232117, "step": 5573 }, { "epoch": 6.839263803680982, "grad_norm": 0.23612014949321747, "learning_rate": 3.875961600953993e-05, "loss": 0.7594619393348694, "step": 5574 }, { "epoch": 6.840490797546012, "grad_norm": 0.21449649333953857, "learning_rate": 3.8755425179221404e-05, "loss": 0.8781822919845581, "step": 5575 }, { "epoch": 6.8417177914110425, "grad_norm": 0.21718928217887878, "learning_rate": 3.875123379446092e-05, "loss": 0.7190046310424805, "step": 5576 }, { "epoch": 6.842944785276074, "grad_norm": 0.2688877284526825, "learning_rate": 3.8747041855427415e-05, "loss": 0.8068972826004028, "step": 5577 }, { "epoch": 6.844171779141105, "grad_norm": 0.2473432719707489, "learning_rate": 3.874284936228987e-05, "loss": 0.7890157699584961, "step": 5578 }, { "epoch": 6.845398773006135, "grad_norm": 0.29953500628471375, "learning_rate": 3.873865631521726e-05, "loss": 0.830997109413147, "step": 5579 }, { "epoch": 6.846625766871165, "grad_norm": 0.28659045696258545, "learning_rate": 3.87344627143786e-05, "loss": 0.5926474332809448, "step": 5580 }, { "epoch": 6.847852760736196, "grad_norm": 0.29487144947052, "learning_rate": 3.873026855994292e-05, "loss": 0.7042441368103027, "step": 5581 }, { "epoch": 6.849079754601227, "grad_norm": 0.261185884475708, "learning_rate": 3.8726073852079266e-05, "loss": 0.8201658129692078, "step": 5582 }, { "epoch": 6.850306748466258, "grad_norm": 0.3032943606376648, "learning_rate": 3.8721878590956726e-05, "loss": 0.48496872186660767, "step": 5583 }, { "epoch": 6.851533742331288, "grad_norm": 0.4271252751350403, "learning_rate": 3.87176827767444e-05, "loss": 0.7115709185600281, "step": 5584 }, { "epoch": 6.852760736196319, "grad_norm": 0.32284462451934814, "learning_rate": 3.87134864096114e-05, "loss": 0.5830533504486084, "step": 5585 }, { "epoch": 6.85398773006135, "grad_norm": 0.2513439655303955, "learning_rate": 3.870928948972688e-05, "loss": 0.9710230827331543, "step": 5586 }, { "epoch": 6.85521472392638, "grad_norm": 0.22457951307296753, "learning_rate": 3.870509201726e-05, "loss": 0.7770220637321472, "step": 5587 }, { "epoch": 6.856441717791411, "grad_norm": 0.24774792790412903, "learning_rate": 3.870089399237995e-05, "loss": 0.8330021500587463, "step": 5588 }, { "epoch": 6.857668711656442, "grad_norm": 0.25766491889953613, "learning_rate": 3.8696695415255935e-05, "loss": 0.7317240238189697, "step": 5589 }, { "epoch": 6.858895705521473, "grad_norm": 0.23615412414073944, "learning_rate": 3.8692496286057194e-05, "loss": 0.6416501402854919, "step": 5590 }, { "epoch": 6.860122699386503, "grad_norm": 0.27788272500038147, "learning_rate": 3.868829660495299e-05, "loss": 0.6235580444335938, "step": 5591 }, { "epoch": 6.861349693251534, "grad_norm": 0.28847363591194153, "learning_rate": 3.868409637211257e-05, "loss": 0.7350674271583557, "step": 5592 }, { "epoch": 6.862576687116564, "grad_norm": 0.21878699958324432, "learning_rate": 3.867989558770527e-05, "loss": 0.6574127674102783, "step": 5593 }, { "epoch": 6.863803680981595, "grad_norm": 0.2730858325958252, "learning_rate": 3.8675694251900385e-05, "loss": 0.7714989185333252, "step": 5594 }, { "epoch": 6.865030674846626, "grad_norm": 0.26979178190231323, "learning_rate": 3.867149236486727e-05, "loss": 0.8028494119644165, "step": 5595 }, { "epoch": 6.866257668711657, "grad_norm": 0.2750879228115082, "learning_rate": 3.86672899267753e-05, "loss": 0.6530344486236572, "step": 5596 }, { "epoch": 6.867484662576687, "grad_norm": 0.25040769577026367, "learning_rate": 3.866308693779385e-05, "loss": 0.8401331901550293, "step": 5597 }, { "epoch": 6.868711656441718, "grad_norm": 0.2427898794412613, "learning_rate": 3.865888339809233e-05, "loss": 0.7268176078796387, "step": 5598 }, { "epoch": 6.869938650306748, "grad_norm": 0.288212388753891, "learning_rate": 3.865467930784017e-05, "loss": 0.7879534959793091, "step": 5599 }, { "epoch": 6.871165644171779, "grad_norm": 0.2907659113407135, "learning_rate": 3.865047466720684e-05, "loss": 0.6540526151657104, "step": 5600 }, { "epoch": 6.87239263803681, "grad_norm": 0.20827338099479675, "learning_rate": 3.864626947636181e-05, "loss": 0.7377516627311707, "step": 5601 }, { "epoch": 6.873619631901841, "grad_norm": 0.3382343351840973, "learning_rate": 3.8642063735474574e-05, "loss": 0.50056391954422, "step": 5602 }, { "epoch": 6.874846625766871, "grad_norm": 0.23068200051784515, "learning_rate": 3.863785744471465e-05, "loss": 0.7208155393600464, "step": 5603 }, { "epoch": 6.876073619631902, "grad_norm": 0.29561901092529297, "learning_rate": 3.8633650604251606e-05, "loss": 0.6869710683822632, "step": 5604 }, { "epoch": 6.877300613496932, "grad_norm": 0.2712336480617523, "learning_rate": 3.8629443214254976e-05, "loss": 0.8668217658996582, "step": 5605 }, { "epoch": 6.8785276073619634, "grad_norm": 0.2791215479373932, "learning_rate": 3.8625235274894365e-05, "loss": 0.6786916255950928, "step": 5606 }, { "epoch": 6.879754601226994, "grad_norm": 0.305839478969574, "learning_rate": 3.862102678633939e-05, "loss": 0.5668191313743591, "step": 5607 }, { "epoch": 6.880981595092025, "grad_norm": 0.21671606600284576, "learning_rate": 3.861681774875966e-05, "loss": 0.7678540945053101, "step": 5608 }, { "epoch": 6.882208588957055, "grad_norm": 0.30156636238098145, "learning_rate": 3.861260816232485e-05, "loss": 0.796812117099762, "step": 5609 }, { "epoch": 6.883435582822086, "grad_norm": 2.247978925704956, "learning_rate": 3.860839802720463e-05, "loss": 0.7108560800552368, "step": 5610 }, { "epoch": 6.884662576687116, "grad_norm": 0.2539139986038208, "learning_rate": 3.8604187343568696e-05, "loss": 0.8898415565490723, "step": 5611 }, { "epoch": 6.8858895705521475, "grad_norm": 0.2611912488937378, "learning_rate": 3.8599976111586776e-05, "loss": 0.7031702995300293, "step": 5612 }, { "epoch": 6.887116564417178, "grad_norm": 0.2416868656873703, "learning_rate": 3.8595764331428606e-05, "loss": 0.9500126838684082, "step": 5613 }, { "epoch": 6.888343558282209, "grad_norm": 0.32387396693229675, "learning_rate": 3.859155200326395e-05, "loss": 0.8011951446533203, "step": 5614 }, { "epoch": 6.889570552147239, "grad_norm": 0.30883580446243286, "learning_rate": 3.858733912726259e-05, "loss": 0.5903657674789429, "step": 5615 }, { "epoch": 6.89079754601227, "grad_norm": 0.25775405764579773, "learning_rate": 3.858312570359435e-05, "loss": 0.7835707664489746, "step": 5616 }, { "epoch": 6.8920245398773, "grad_norm": 0.31865784525871277, "learning_rate": 3.8578911732429045e-05, "loss": 0.6112868785858154, "step": 5617 }, { "epoch": 6.8932515337423315, "grad_norm": 0.28757038712501526, "learning_rate": 3.857469721393655e-05, "loss": 0.8655029535293579, "step": 5618 }, { "epoch": 6.894478527607362, "grad_norm": 0.2677695155143738, "learning_rate": 3.857048214828672e-05, "loss": 0.9084411859512329, "step": 5619 }, { "epoch": 6.895705521472393, "grad_norm": 0.3476846218109131, "learning_rate": 3.856626653564945e-05, "loss": 0.8632991313934326, "step": 5620 }, { "epoch": 6.896932515337423, "grad_norm": 0.26916930079460144, "learning_rate": 3.856205037619468e-05, "loss": 0.5971664786338806, "step": 5621 }, { "epoch": 6.898159509202454, "grad_norm": 0.26548317074775696, "learning_rate": 3.8557833670092334e-05, "loss": 0.7395472526550293, "step": 5622 }, { "epoch": 6.899386503067484, "grad_norm": 0.2336520552635193, "learning_rate": 3.8553616417512386e-05, "loss": 0.6305131912231445, "step": 5623 }, { "epoch": 6.9006134969325155, "grad_norm": 0.26417502760887146, "learning_rate": 3.8549398618624816e-05, "loss": 0.6446887850761414, "step": 5624 }, { "epoch": 6.901840490797546, "grad_norm": 0.294486939907074, "learning_rate": 3.854518027359963e-05, "loss": 0.5778273940086365, "step": 5625 }, { "epoch": 6.903067484662577, "grad_norm": 0.3135111927986145, "learning_rate": 3.854096138260686e-05, "loss": 0.6921685934066772, "step": 5626 }, { "epoch": 6.904294478527607, "grad_norm": 0.3164289593696594, "learning_rate": 3.8536741945816544e-05, "loss": 0.7402302026748657, "step": 5627 }, { "epoch": 6.905521472392638, "grad_norm": 0.29940664768218994, "learning_rate": 3.853252196339878e-05, "loss": 0.7507275342941284, "step": 5628 }, { "epoch": 6.906748466257668, "grad_norm": 0.3084210157394409, "learning_rate": 3.852830143552365e-05, "loss": 0.5124939680099487, "step": 5629 }, { "epoch": 6.9079754601226995, "grad_norm": 0.27682071924209595, "learning_rate": 3.852408036236128e-05, "loss": 0.8271034955978394, "step": 5630 }, { "epoch": 6.90920245398773, "grad_norm": 0.26359131932258606, "learning_rate": 3.8519858744081793e-05, "loss": 0.696992039680481, "step": 5631 }, { "epoch": 6.910429447852761, "grad_norm": 0.29081788659095764, "learning_rate": 3.8515636580855364e-05, "loss": 0.6283696293830872, "step": 5632 }, { "epoch": 6.911656441717791, "grad_norm": 0.2803874611854553, "learning_rate": 3.851141387285217e-05, "loss": 0.7814971208572388, "step": 5633 }, { "epoch": 6.912883435582822, "grad_norm": 0.33801230788230896, "learning_rate": 3.8507190620242426e-05, "loss": 0.5299559831619263, "step": 5634 }, { "epoch": 6.914110429447852, "grad_norm": 0.2223854660987854, "learning_rate": 3.850296682319634e-05, "loss": 0.8451563119888306, "step": 5635 }, { "epoch": 6.9153374233128835, "grad_norm": 0.27583879232406616, "learning_rate": 3.849874248188418e-05, "loss": 0.6782079935073853, "step": 5636 }, { "epoch": 6.916564417177915, "grad_norm": 0.2436164766550064, "learning_rate": 3.849451759647621e-05, "loss": 0.9265624284744263, "step": 5637 }, { "epoch": 6.917791411042945, "grad_norm": 0.23460789024829865, "learning_rate": 3.849029216714272e-05, "loss": 0.7429943084716797, "step": 5638 }, { "epoch": 6.919018404907975, "grad_norm": 0.30973032116889954, "learning_rate": 3.8486066194054026e-05, "loss": 0.5846710205078125, "step": 5639 }, { "epoch": 6.920245398773006, "grad_norm": 0.2533928453922272, "learning_rate": 3.848183967738047e-05, "loss": 0.7977232933044434, "step": 5640 }, { "epoch": 6.921472392638037, "grad_norm": 0.25326475501060486, "learning_rate": 3.8477612617292416e-05, "loss": 0.68941730260849, "step": 5641 }, { "epoch": 6.9226993865030675, "grad_norm": 0.24770212173461914, "learning_rate": 3.847338501396023e-05, "loss": 0.8166533708572388, "step": 5642 }, { "epoch": 6.923926380368098, "grad_norm": 0.2551482915878296, "learning_rate": 3.846915686755432e-05, "loss": 0.8467992544174194, "step": 5643 }, { "epoch": 6.925153374233129, "grad_norm": 0.30291128158569336, "learning_rate": 3.8464928178245116e-05, "loss": 0.7521048784255981, "step": 5644 }, { "epoch": 6.92638036809816, "grad_norm": 0.19165650010108948, "learning_rate": 3.8460698946203054e-05, "loss": 0.645338237285614, "step": 5645 }, { "epoch": 6.92760736196319, "grad_norm": 0.30565837025642395, "learning_rate": 3.845646917159862e-05, "loss": 0.5637853145599365, "step": 5646 }, { "epoch": 6.92883435582822, "grad_norm": 0.26546162366867065, "learning_rate": 3.845223885460228e-05, "loss": 0.777478814125061, "step": 5647 }, { "epoch": 6.9300613496932515, "grad_norm": 0.31170305609703064, "learning_rate": 3.844800799538456e-05, "loss": 0.6975896954536438, "step": 5648 }, { "epoch": 6.931288343558283, "grad_norm": 0.2894129157066345, "learning_rate": 3.8443776594116e-05, "loss": 0.6224988102912903, "step": 5649 }, { "epoch": 6.932515337423313, "grad_norm": 0.25799593329429626, "learning_rate": 3.843954465096714e-05, "loss": 0.644985556602478, "step": 5650 }, { "epoch": 6.933742331288344, "grad_norm": 0.35385411977767944, "learning_rate": 3.8435312166108576e-05, "loss": 0.6279516816139221, "step": 5651 }, { "epoch": 6.934969325153374, "grad_norm": 0.217922180891037, "learning_rate": 3.843107913971089e-05, "loss": 0.8657538890838623, "step": 5652 }, { "epoch": 6.936196319018405, "grad_norm": 0.2602849304676056, "learning_rate": 3.842684557194471e-05, "loss": 0.651217520236969, "step": 5653 }, { "epoch": 6.9374233128834355, "grad_norm": 0.29914015531539917, "learning_rate": 3.8422611462980684e-05, "loss": 0.641997218132019, "step": 5654 }, { "epoch": 6.938650306748467, "grad_norm": 0.36712780594825745, "learning_rate": 3.8418376812989466e-05, "loss": 0.6326677799224854, "step": 5655 }, { "epoch": 6.939877300613497, "grad_norm": 0.26988881826400757, "learning_rate": 3.841414162214176e-05, "loss": 0.6478870511054993, "step": 5656 }, { "epoch": 6.941104294478528, "grad_norm": 0.21237793564796448, "learning_rate": 3.840990589060825e-05, "loss": 0.8116947412490845, "step": 5657 }, { "epoch": 6.942331288343558, "grad_norm": 0.31636467576026917, "learning_rate": 3.8405669618559693e-05, "loss": 0.5625678896903992, "step": 5658 }, { "epoch": 6.943558282208589, "grad_norm": 0.28737008571624756, "learning_rate": 3.840143280616683e-05, "loss": 0.7051722407341003, "step": 5659 }, { "epoch": 6.9447852760736195, "grad_norm": 0.2325039952993393, "learning_rate": 3.839719545360041e-05, "loss": 0.8335747718811035, "step": 5660 }, { "epoch": 6.946012269938651, "grad_norm": 0.2575431168079376, "learning_rate": 3.8392957561031275e-05, "loss": 0.84339439868927, "step": 5661 }, { "epoch": 6.947239263803681, "grad_norm": 0.2818765640258789, "learning_rate": 3.83887191286302e-05, "loss": 0.6742714643478394, "step": 5662 }, { "epoch": 6.948466257668712, "grad_norm": 0.2741274833679199, "learning_rate": 3.838448015656806e-05, "loss": 0.760460376739502, "step": 5663 }, { "epoch": 6.949693251533742, "grad_norm": 0.2864624857902527, "learning_rate": 3.838024064501569e-05, "loss": 0.6397788524627686, "step": 5664 }, { "epoch": 6.950920245398773, "grad_norm": 0.2470414638519287, "learning_rate": 3.8376000594143976e-05, "loss": 0.860629677772522, "step": 5665 }, { "epoch": 6.9521472392638035, "grad_norm": 0.25201618671417236, "learning_rate": 3.837176000412384e-05, "loss": 0.6771450042724609, "step": 5666 }, { "epoch": 6.953374233128835, "grad_norm": 0.24324803054332733, "learning_rate": 3.836751887512619e-05, "loss": 0.640735387802124, "step": 5667 }, { "epoch": 6.954601226993865, "grad_norm": 0.21761317551136017, "learning_rate": 3.836327720732198e-05, "loss": 0.7925719022750854, "step": 5668 }, { "epoch": 6.955828220858896, "grad_norm": 0.302652508020401, "learning_rate": 3.8359035000882174e-05, "loss": 0.6279807090759277, "step": 5669 }, { "epoch": 6.957055214723926, "grad_norm": 0.25184860825538635, "learning_rate": 3.8354792255977774e-05, "loss": 0.7912209033966064, "step": 5670 }, { "epoch": 6.958282208588957, "grad_norm": 0.3538777232170105, "learning_rate": 3.8350548972779775e-05, "loss": 0.6327269673347473, "step": 5671 }, { "epoch": 6.9595092024539875, "grad_norm": 0.2782999575138092, "learning_rate": 3.834630515145924e-05, "loss": 0.7411371469497681, "step": 5672 }, { "epoch": 6.960736196319019, "grad_norm": 0.299936443567276, "learning_rate": 3.834206079218721e-05, "loss": 0.6382342576980591, "step": 5673 }, { "epoch": 6.961963190184049, "grad_norm": 0.34012308716773987, "learning_rate": 3.833781589513474e-05, "loss": 0.5333859920501709, "step": 5674 }, { "epoch": 6.96319018404908, "grad_norm": 0.26332834362983704, "learning_rate": 3.833357046047298e-05, "loss": 0.819821298122406, "step": 5675 }, { "epoch": 6.96441717791411, "grad_norm": 0.26911982893943787, "learning_rate": 3.8329324488373005e-05, "loss": 0.942501425743103, "step": 5676 }, { "epoch": 6.965644171779141, "grad_norm": 0.31682124733924866, "learning_rate": 3.832507797900599e-05, "loss": 0.5676393508911133, "step": 5677 }, { "epoch": 6.9668711656441715, "grad_norm": 0.24142242968082428, "learning_rate": 3.8320830932543074e-05, "loss": 0.754204273223877, "step": 5678 }, { "epoch": 6.968098159509203, "grad_norm": 0.23667709529399872, "learning_rate": 3.8316583349155456e-05, "loss": 0.822529673576355, "step": 5679 }, { "epoch": 6.969325153374233, "grad_norm": 0.2419363409280777, "learning_rate": 3.831233522901435e-05, "loss": 0.65889573097229, "step": 5680 }, { "epoch": 6.970552147239264, "grad_norm": 0.29311689734458923, "learning_rate": 3.8308086572290975e-05, "loss": 0.6742404103279114, "step": 5681 }, { "epoch": 6.971779141104294, "grad_norm": 0.2983185052871704, "learning_rate": 3.8303837379156584e-05, "loss": 0.7814952731132507, "step": 5682 }, { "epoch": 6.973006134969325, "grad_norm": 0.33465057611465454, "learning_rate": 3.829958764978245e-05, "loss": 0.688302755355835, "step": 5683 }, { "epoch": 6.9742331288343555, "grad_norm": 0.305622398853302, "learning_rate": 3.829533738433987e-05, "loss": 0.7744097709655762, "step": 5684 }, { "epoch": 6.975460122699387, "grad_norm": 0.2614782452583313, "learning_rate": 3.8291086583000165e-05, "loss": 0.948888897895813, "step": 5685 }, { "epoch": 6.976687116564417, "grad_norm": 0.3074389398097992, "learning_rate": 3.828683524593467e-05, "loss": 0.8067672848701477, "step": 5686 }, { "epoch": 6.977914110429448, "grad_norm": 0.25044623017311096, "learning_rate": 3.828258337331474e-05, "loss": 0.702908992767334, "step": 5687 }, { "epoch": 6.979141104294478, "grad_norm": 0.28451743721961975, "learning_rate": 3.827833096531175e-05, "loss": 0.605992317199707, "step": 5688 }, { "epoch": 6.980368098159509, "grad_norm": 0.23464976251125336, "learning_rate": 3.827407802209712e-05, "loss": 0.8019529581069946, "step": 5689 }, { "epoch": 6.9815950920245395, "grad_norm": 0.21841105818748474, "learning_rate": 3.826982454384226e-05, "loss": 0.79256272315979, "step": 5690 }, { "epoch": 6.982822085889571, "grad_norm": 0.31273117661476135, "learning_rate": 3.8265570530718616e-05, "loss": 0.6261903643608093, "step": 5691 }, { "epoch": 6.984049079754601, "grad_norm": 0.2875233590602875, "learning_rate": 3.826131598289766e-05, "loss": 0.6392158269882202, "step": 5692 }, { "epoch": 6.985276073619632, "grad_norm": 0.2905116379261017, "learning_rate": 3.825706090055088e-05, "loss": 0.6533576846122742, "step": 5693 }, { "epoch": 6.986503067484662, "grad_norm": 0.2794059216976166, "learning_rate": 3.825280528384978e-05, "loss": 0.5899734497070312, "step": 5694 }, { "epoch": 6.987730061349693, "grad_norm": 0.2754972577095032, "learning_rate": 3.82485491329659e-05, "loss": 0.7364608645439148, "step": 5695 }, { "epoch": 6.9889570552147235, "grad_norm": 0.24846452474594116, "learning_rate": 3.8244292448070794e-05, "loss": 0.8166987895965576, "step": 5696 }, { "epoch": 6.990184049079755, "grad_norm": 0.26850712299346924, "learning_rate": 3.8240035229336036e-05, "loss": 0.7800847291946411, "step": 5697 }, { "epoch": 6.991411042944785, "grad_norm": 0.2824145555496216, "learning_rate": 3.823577747693321e-05, "loss": 0.6260711550712585, "step": 5698 }, { "epoch": 6.992638036809816, "grad_norm": 0.23629596829414368, "learning_rate": 3.8231519191033956e-05, "loss": 0.6823861598968506, "step": 5699 }, { "epoch": 6.993865030674847, "grad_norm": 0.2837231457233429, "learning_rate": 3.822726037180989e-05, "loss": 0.7341170310974121, "step": 5700 }, { "epoch": 6.995092024539877, "grad_norm": 0.29072460532188416, "learning_rate": 3.822300101943268e-05, "loss": 0.7663797736167908, "step": 5701 }, { "epoch": 6.9963190184049076, "grad_norm": 0.23086318373680115, "learning_rate": 3.8218741134074026e-05, "loss": 0.7900674939155579, "step": 5702 }, { "epoch": 6.997546012269939, "grad_norm": 0.28826993703842163, "learning_rate": 3.821448071590561e-05, "loss": 0.704505443572998, "step": 5703 }, { "epoch": 6.99877300613497, "grad_norm": 0.25748100876808167, "learning_rate": 3.821021976509917e-05, "loss": 0.8802182078361511, "step": 5704 }, { "epoch": 7.0, "grad_norm": 0.2996480464935303, "learning_rate": 3.820595828182644e-05, "loss": 0.5914949774742126, "step": 5705 }, { "epoch": 7.001226993865031, "grad_norm": 0.2919679880142212, "learning_rate": 3.82016962662592e-05, "loss": 0.5880135893821716, "step": 5706 }, { "epoch": 7.002453987730061, "grad_norm": 0.2121526151895523, "learning_rate": 3.8197433718569245e-05, "loss": 0.7748396396636963, "step": 5707 }, { "epoch": 7.0036809815950924, "grad_norm": 0.30288249254226685, "learning_rate": 3.819317063892836e-05, "loss": 0.7896400094032288, "step": 5708 }, { "epoch": 7.004907975460123, "grad_norm": 0.26643607020378113, "learning_rate": 3.818890702750841e-05, "loss": 0.5482116937637329, "step": 5709 }, { "epoch": 7.006134969325154, "grad_norm": 0.262424111366272, "learning_rate": 3.8184642884481224e-05, "loss": 0.6494621634483337, "step": 5710 }, { "epoch": 7.007361963190184, "grad_norm": 0.23626065254211426, "learning_rate": 3.818037821001869e-05, "loss": 0.6529302597045898, "step": 5711 }, { "epoch": 7.008588957055215, "grad_norm": 0.26063403487205505, "learning_rate": 3.81761130042927e-05, "loss": 0.6769281625747681, "step": 5712 }, { "epoch": 7.009815950920245, "grad_norm": 0.32829055190086365, "learning_rate": 3.817184726747518e-05, "loss": 0.6162611842155457, "step": 5713 }, { "epoch": 7.0110429447852765, "grad_norm": 0.23501518368721008, "learning_rate": 3.816758099973806e-05, "loss": 0.6401578187942505, "step": 5714 }, { "epoch": 7.012269938650307, "grad_norm": 0.22511544823646545, "learning_rate": 3.8163314201253306e-05, "loss": 0.8815348148345947, "step": 5715 }, { "epoch": 7.013496932515338, "grad_norm": 0.29899340867996216, "learning_rate": 3.815904687219289e-05, "loss": 0.5118511915206909, "step": 5716 }, { "epoch": 7.014723926380368, "grad_norm": 0.3392999470233917, "learning_rate": 3.815477901272884e-05, "loss": 0.50223708152771, "step": 5717 }, { "epoch": 7.015950920245399, "grad_norm": 0.27232038974761963, "learning_rate": 3.815051062303317e-05, "loss": 0.5593956112861633, "step": 5718 }, { "epoch": 7.017177914110429, "grad_norm": 0.2713596820831299, "learning_rate": 3.8146241703277907e-05, "loss": 0.6931100487709045, "step": 5719 }, { "epoch": 7.0184049079754605, "grad_norm": 0.24310515820980072, "learning_rate": 3.814197225363514e-05, "loss": 0.7357580661773682, "step": 5720 }, { "epoch": 7.019631901840491, "grad_norm": 0.26074740290641785, "learning_rate": 3.813770227427696e-05, "loss": 0.8105883598327637, "step": 5721 }, { "epoch": 7.020858895705522, "grad_norm": 0.28806519508361816, "learning_rate": 3.813343176537546e-05, "loss": 0.7499642372131348, "step": 5722 }, { "epoch": 7.022085889570552, "grad_norm": 0.23790492117404938, "learning_rate": 3.812916072710279e-05, "loss": 0.6715476512908936, "step": 5723 }, { "epoch": 7.023312883435583, "grad_norm": 0.26615819334983826, "learning_rate": 3.8124889159631106e-05, "loss": 0.6469918489456177, "step": 5724 }, { "epoch": 7.024539877300613, "grad_norm": 0.3122289180755615, "learning_rate": 3.812061706313256e-05, "loss": 0.5356177091598511, "step": 5725 }, { "epoch": 7.0257668711656445, "grad_norm": 0.29361692070961, "learning_rate": 3.811634443777936e-05, "loss": 0.7587743997573853, "step": 5726 }, { "epoch": 7.026993865030675, "grad_norm": 0.22588370740413666, "learning_rate": 3.8112071283743736e-05, "loss": 0.6501748561859131, "step": 5727 }, { "epoch": 7.028220858895706, "grad_norm": 0.28082147240638733, "learning_rate": 3.810779760119791e-05, "loss": 0.5729328393936157, "step": 5728 }, { "epoch": 7.029447852760736, "grad_norm": 0.32679417729377747, "learning_rate": 3.8103523390314146e-05, "loss": 0.7469659447669983, "step": 5729 }, { "epoch": 7.030674846625767, "grad_norm": 0.278317391872406, "learning_rate": 3.809924865126472e-05, "loss": 0.7636211514472961, "step": 5730 }, { "epoch": 7.031901840490797, "grad_norm": 0.26511240005493164, "learning_rate": 3.809497338422196e-05, "loss": 0.673694908618927, "step": 5731 }, { "epoch": 7.0331288343558285, "grad_norm": 0.3094984292984009, "learning_rate": 3.8090697589358155e-05, "loss": 0.5514969825744629, "step": 5732 }, { "epoch": 7.034355828220859, "grad_norm": 0.2300422191619873, "learning_rate": 3.8086421266845674e-05, "loss": 0.7268428802490234, "step": 5733 }, { "epoch": 7.03558282208589, "grad_norm": 0.24021252989768982, "learning_rate": 3.808214441685687e-05, "loss": 0.7193339467048645, "step": 5734 }, { "epoch": 7.03680981595092, "grad_norm": 0.31494513154029846, "learning_rate": 3.8077867039564136e-05, "loss": 0.616036593914032, "step": 5735 }, { "epoch": 7.038036809815951, "grad_norm": 0.19239456951618195, "learning_rate": 3.807358913513989e-05, "loss": 0.901745080947876, "step": 5736 }, { "epoch": 7.039263803680981, "grad_norm": 0.28304386138916016, "learning_rate": 3.806931070375654e-05, "loss": 0.7294573783874512, "step": 5737 }, { "epoch": 7.0404907975460125, "grad_norm": 0.2349962443113327, "learning_rate": 3.8065031745586556e-05, "loss": 0.6922515630722046, "step": 5738 }, { "epoch": 7.041717791411043, "grad_norm": 0.24409744143486023, "learning_rate": 3.80607522608024e-05, "loss": 0.7446771860122681, "step": 5739 }, { "epoch": 7.042944785276074, "grad_norm": 0.29103758931159973, "learning_rate": 3.805647224957658e-05, "loss": 0.7963830828666687, "step": 5740 }, { "epoch": 7.044171779141104, "grad_norm": 0.2517506778240204, "learning_rate": 3.8052191712081595e-05, "loss": 0.7637534141540527, "step": 5741 }, { "epoch": 7.045398773006135, "grad_norm": 0.25786063075065613, "learning_rate": 3.8047910648489996e-05, "loss": 0.6606325507164001, "step": 5742 }, { "epoch": 7.046625766871165, "grad_norm": 0.23032106459140778, "learning_rate": 3.8043629058974326e-05, "loss": 0.8398311138153076, "step": 5743 }, { "epoch": 7.0478527607361965, "grad_norm": 0.2405754029750824, "learning_rate": 3.803934694370717e-05, "loss": 0.6649873852729797, "step": 5744 }, { "epoch": 7.049079754601227, "grad_norm": 0.27056556940078735, "learning_rate": 3.8035064302861136e-05, "loss": 0.46746736764907837, "step": 5745 }, { "epoch": 7.050306748466258, "grad_norm": 0.27358341217041016, "learning_rate": 3.8030781136608826e-05, "loss": 0.8908628225326538, "step": 5746 }, { "epoch": 7.051533742331288, "grad_norm": 0.2878827452659607, "learning_rate": 3.80264974451229e-05, "loss": 0.5642281174659729, "step": 5747 }, { "epoch": 7.052760736196319, "grad_norm": 0.22149722278118134, "learning_rate": 3.802221322857601e-05, "loss": 0.8696717023849487, "step": 5748 }, { "epoch": 7.053987730061349, "grad_norm": 0.2560853362083435, "learning_rate": 3.801792848714084e-05, "loss": 0.6797542572021484, "step": 5749 }, { "epoch": 7.0552147239263805, "grad_norm": 0.23710009455680847, "learning_rate": 3.801364322099011e-05, "loss": 0.6971901655197144, "step": 5750 }, { "epoch": 7.056441717791411, "grad_norm": 0.2609908878803253, "learning_rate": 3.8009357430296543e-05, "loss": 0.7587698698043823, "step": 5751 }, { "epoch": 7.057668711656442, "grad_norm": 0.22993150353431702, "learning_rate": 3.8005071115232877e-05, "loss": 0.7661615014076233, "step": 5752 }, { "epoch": 7.058895705521472, "grad_norm": 0.29747530817985535, "learning_rate": 3.800078427597188e-05, "loss": 0.6693513989448547, "step": 5753 }, { "epoch": 7.060122699386503, "grad_norm": 0.25368472933769226, "learning_rate": 3.799649691268635e-05, "loss": 0.7628533244132996, "step": 5754 }, { "epoch": 7.061349693251533, "grad_norm": 0.2375860959291458, "learning_rate": 3.7992209025549105e-05, "loss": 0.7795132398605347, "step": 5755 }, { "epoch": 7.0625766871165645, "grad_norm": 0.23330388963222504, "learning_rate": 3.7987920614732966e-05, "loss": 0.7202473878860474, "step": 5756 }, { "epoch": 7.063803680981595, "grad_norm": 0.26941362023353577, "learning_rate": 3.798363168041079e-05, "loss": 0.7361433506011963, "step": 5757 }, { "epoch": 7.065030674846626, "grad_norm": 0.22689878940582275, "learning_rate": 3.797934222275544e-05, "loss": 0.6488667726516724, "step": 5758 }, { "epoch": 7.066257668711656, "grad_norm": 0.35055142641067505, "learning_rate": 3.7975052241939834e-05, "loss": 0.4250870645046234, "step": 5759 }, { "epoch": 7.067484662576687, "grad_norm": 0.24476021528244019, "learning_rate": 3.797076173813688e-05, "loss": 0.9305025935173035, "step": 5760 }, { "epoch": 7.068711656441717, "grad_norm": 0.22748532891273499, "learning_rate": 3.796647071151951e-05, "loss": 0.7705156803131104, "step": 5761 }, { "epoch": 7.0699386503067485, "grad_norm": 0.25074633955955505, "learning_rate": 3.7962179162260684e-05, "loss": 0.705619215965271, "step": 5762 }, { "epoch": 7.071165644171779, "grad_norm": 0.23899589478969574, "learning_rate": 3.795788709053339e-05, "loss": 0.7565038800239563, "step": 5763 }, { "epoch": 7.07239263803681, "grad_norm": 0.2451706975698471, "learning_rate": 3.795359449651063e-05, "loss": 0.7104392051696777, "step": 5764 }, { "epoch": 7.07361963190184, "grad_norm": 0.2002902328968048, "learning_rate": 3.794930138036541e-05, "loss": 0.849031925201416, "step": 5765 }, { "epoch": 7.074846625766871, "grad_norm": 0.2530459463596344, "learning_rate": 3.794500774227079e-05, "loss": 0.7399542331695557, "step": 5766 }, { "epoch": 7.076073619631902, "grad_norm": 0.2509486973285675, "learning_rate": 3.794071358239982e-05, "loss": 0.6328683495521545, "step": 5767 }, { "epoch": 7.0773006134969325, "grad_norm": 0.24931874871253967, "learning_rate": 3.79364189009256e-05, "loss": 0.7342414855957031, "step": 5768 }, { "epoch": 7.078527607361964, "grad_norm": 0.264757364988327, "learning_rate": 3.7932123698021236e-05, "loss": 0.6104170680046082, "step": 5769 }, { "epoch": 7.079754601226994, "grad_norm": 0.2629513442516327, "learning_rate": 3.792782797385984e-05, "loss": 0.5299086570739746, "step": 5770 }, { "epoch": 7.080981595092025, "grad_norm": 0.24527527391910553, "learning_rate": 3.792353172861457e-05, "loss": 0.6505035161972046, "step": 5771 }, { "epoch": 7.082208588957055, "grad_norm": 0.2238888144493103, "learning_rate": 3.7919234962458595e-05, "loss": 0.6836763620376587, "step": 5772 }, { "epoch": 7.083435582822086, "grad_norm": 0.3290877938270569, "learning_rate": 3.791493767556511e-05, "loss": 0.6710451245307922, "step": 5773 }, { "epoch": 7.0846625766871165, "grad_norm": 0.28782328963279724, "learning_rate": 3.7910639868107326e-05, "loss": 0.679222822189331, "step": 5774 }, { "epoch": 7.085889570552148, "grad_norm": 0.33662673830986023, "learning_rate": 3.790634154025846e-05, "loss": 0.5346361398696899, "step": 5775 }, { "epoch": 7.087116564417178, "grad_norm": 0.24354194104671478, "learning_rate": 3.790204269219178e-05, "loss": 0.8812075257301331, "step": 5776 }, { "epoch": 7.088343558282209, "grad_norm": 0.35864752531051636, "learning_rate": 3.789774332408056e-05, "loss": 0.37346071004867554, "step": 5777 }, { "epoch": 7.089570552147239, "grad_norm": 0.26326292753219604, "learning_rate": 3.7893443436098094e-05, "loss": 0.6792017221450806, "step": 5778 }, { "epoch": 7.09079754601227, "grad_norm": 0.2607342600822449, "learning_rate": 3.78891430284177e-05, "loss": 0.627822756767273, "step": 5779 }, { "epoch": 7.0920245398773005, "grad_norm": 0.22066715359687805, "learning_rate": 3.7884842101212694e-05, "loss": 0.7406418323516846, "step": 5780 }, { "epoch": 7.093251533742332, "grad_norm": 0.21000456809997559, "learning_rate": 3.788054065465646e-05, "loss": 0.8975024223327637, "step": 5781 }, { "epoch": 7.094478527607362, "grad_norm": 0.21854786574840546, "learning_rate": 3.7876238688922375e-05, "loss": 0.6862730383872986, "step": 5782 }, { "epoch": 7.095705521472393, "grad_norm": 0.251503586769104, "learning_rate": 3.787193620418382e-05, "loss": 0.5762127637863159, "step": 5783 }, { "epoch": 7.096932515337423, "grad_norm": 0.24671107530593872, "learning_rate": 3.7867633200614244e-05, "loss": 0.8458935022354126, "step": 5784 }, { "epoch": 7.098159509202454, "grad_norm": 0.23950673639774323, "learning_rate": 3.786332967838706e-05, "loss": 0.7848527431488037, "step": 5785 }, { "epoch": 7.0993865030674845, "grad_norm": 0.20427197217941284, "learning_rate": 3.785902563767576e-05, "loss": 0.6811695694923401, "step": 5786 }, { "epoch": 7.100613496932516, "grad_norm": 0.23556454479694366, "learning_rate": 3.78547210786538e-05, "loss": 0.7113250494003296, "step": 5787 }, { "epoch": 7.101840490797546, "grad_norm": 0.29113155603408813, "learning_rate": 3.785041600149469e-05, "loss": 0.7049829959869385, "step": 5788 }, { "epoch": 7.103067484662577, "grad_norm": 0.35703834891319275, "learning_rate": 3.7846110406371975e-05, "loss": 0.5531357526779175, "step": 5789 }, { "epoch": 7.104294478527607, "grad_norm": 0.2648620307445526, "learning_rate": 3.7841804293459184e-05, "loss": 0.5703718066215515, "step": 5790 }, { "epoch": 7.105521472392638, "grad_norm": 0.39179351925849915, "learning_rate": 3.7837497662929886e-05, "loss": 0.42896246910095215, "step": 5791 }, { "epoch": 7.1067484662576685, "grad_norm": 0.222457617521286, "learning_rate": 3.783319051495767e-05, "loss": 0.8804888129234314, "step": 5792 }, { "epoch": 7.1079754601227, "grad_norm": 0.24094264209270477, "learning_rate": 3.7828882849716155e-05, "loss": 0.8364083766937256, "step": 5793 }, { "epoch": 7.10920245398773, "grad_norm": 0.23830777406692505, "learning_rate": 3.7824574667378946e-05, "loss": 0.9345521926879883, "step": 5794 }, { "epoch": 7.110429447852761, "grad_norm": 0.2724105715751648, "learning_rate": 3.7820265968119714e-05, "loss": 0.7406518459320068, "step": 5795 }, { "epoch": 7.111656441717791, "grad_norm": 0.2105710357427597, "learning_rate": 3.781595675211213e-05, "loss": 0.9350991249084473, "step": 5796 }, { "epoch": 7.112883435582822, "grad_norm": 0.3304063379764557, "learning_rate": 3.781164701952988e-05, "loss": 0.5292978286743164, "step": 5797 }, { "epoch": 7.1141104294478525, "grad_norm": 0.24429793655872345, "learning_rate": 3.780733677054668e-05, "loss": 0.7349046468734741, "step": 5798 }, { "epoch": 7.115337423312884, "grad_norm": 0.2298005223274231, "learning_rate": 3.780302600533626e-05, "loss": 0.8260232210159302, "step": 5799 }, { "epoch": 7.116564417177914, "grad_norm": 0.24016407132148743, "learning_rate": 3.779871472407237e-05, "loss": 0.754271388053894, "step": 5800 }, { "epoch": 7.117791411042945, "grad_norm": 0.2789418399333954, "learning_rate": 3.7794402926928806e-05, "loss": 0.5984678268432617, "step": 5801 }, { "epoch": 7.119018404907975, "grad_norm": 0.2318163961172104, "learning_rate": 3.7790090614079346e-05, "loss": 0.9231932163238525, "step": 5802 }, { "epoch": 7.120245398773006, "grad_norm": 0.23536492884159088, "learning_rate": 3.778577778569781e-05, "loss": 0.7954349517822266, "step": 5803 }, { "epoch": 7.1214723926380366, "grad_norm": 0.2877279222011566, "learning_rate": 3.778146444195804e-05, "loss": 0.6347666382789612, "step": 5804 }, { "epoch": 7.122699386503068, "grad_norm": 0.3356308937072754, "learning_rate": 3.777715058303389e-05, "loss": 0.6488298177719116, "step": 5805 }, { "epoch": 7.123926380368098, "grad_norm": 0.25291574001312256, "learning_rate": 3.777283620909924e-05, "loss": 0.6567637920379639, "step": 5806 }, { "epoch": 7.125153374233129, "grad_norm": 0.2748711407184601, "learning_rate": 3.7768521320327986e-05, "loss": 0.7464680671691895, "step": 5807 }, { "epoch": 7.126380368098159, "grad_norm": 0.23969832062721252, "learning_rate": 3.776420591689407e-05, "loss": 0.8467729091644287, "step": 5808 }, { "epoch": 7.12760736196319, "grad_norm": 0.2590467035770416, "learning_rate": 3.775988999897141e-05, "loss": 0.7267615795135498, "step": 5809 }, { "epoch": 7.128834355828221, "grad_norm": 0.25393110513687134, "learning_rate": 3.7755573566733984e-05, "loss": 0.6633514761924744, "step": 5810 }, { "epoch": 7.130061349693252, "grad_norm": 0.24692746996879578, "learning_rate": 3.7751256620355755e-05, "loss": 0.6324528455734253, "step": 5811 }, { "epoch": 7.131288343558282, "grad_norm": 0.20168974995613098, "learning_rate": 3.774693916001074e-05, "loss": 0.7789791226387024, "step": 5812 }, { "epoch": 7.132515337423313, "grad_norm": 0.23001807928085327, "learning_rate": 3.7742621185872975e-05, "loss": 0.7328105568885803, "step": 5813 }, { "epoch": 7.133742331288343, "grad_norm": 0.29993581771850586, "learning_rate": 3.773830269811648e-05, "loss": 0.5843913555145264, "step": 5814 }, { "epoch": 7.134969325153374, "grad_norm": 0.23900999128818512, "learning_rate": 3.773398369691534e-05, "loss": 0.6551871299743652, "step": 5815 }, { "epoch": 7.136196319018405, "grad_norm": 0.25544577836990356, "learning_rate": 3.772966418244363e-05, "loss": 0.6643036603927612, "step": 5816 }, { "epoch": 7.137423312883436, "grad_norm": 0.3033428192138672, "learning_rate": 3.772534415487548e-05, "loss": 0.5810539126396179, "step": 5817 }, { "epoch": 7.138650306748466, "grad_norm": 0.23131698369979858, "learning_rate": 3.7721023614384984e-05, "loss": 0.7489285469055176, "step": 5818 }, { "epoch": 7.139877300613497, "grad_norm": 0.27078303694725037, "learning_rate": 3.7716702561146313e-05, "loss": 0.7659643292427063, "step": 5819 }, { "epoch": 7.141104294478527, "grad_norm": 0.23626533150672913, "learning_rate": 3.771238099533363e-05, "loss": 0.8648258447647095, "step": 5820 }, { "epoch": 7.142331288343558, "grad_norm": 0.2495194524526596, "learning_rate": 3.7708058917121126e-05, "loss": 0.8230189085006714, "step": 5821 }, { "epoch": 7.143558282208589, "grad_norm": 0.2273411899805069, "learning_rate": 3.770373632668302e-05, "loss": 0.5948481559753418, "step": 5822 }, { "epoch": 7.14478527607362, "grad_norm": 0.28671419620513916, "learning_rate": 3.769941322419352e-05, "loss": 0.6602520942687988, "step": 5823 }, { "epoch": 7.14601226993865, "grad_norm": 0.25202110409736633, "learning_rate": 3.76950896098269e-05, "loss": 0.5152085423469543, "step": 5824 }, { "epoch": 7.147239263803681, "grad_norm": 0.2667481005191803, "learning_rate": 3.769076548375743e-05, "loss": 0.6730095744132996, "step": 5825 }, { "epoch": 7.148466257668711, "grad_norm": 0.3171565532684326, "learning_rate": 3.768644084615939e-05, "loss": 0.6119099259376526, "step": 5826 }, { "epoch": 7.149693251533742, "grad_norm": 0.24488581717014313, "learning_rate": 3.768211569720711e-05, "loss": 0.7415280342102051, "step": 5827 }, { "epoch": 7.150920245398773, "grad_norm": 0.22485168278217316, "learning_rate": 3.76777900370749e-05, "loss": 0.624983549118042, "step": 5828 }, { "epoch": 7.152147239263804, "grad_norm": 0.2260986715555191, "learning_rate": 3.7673463865937154e-05, "loss": 0.7477884292602539, "step": 5829 }, { "epoch": 7.153374233128835, "grad_norm": 0.34273988008499146, "learning_rate": 3.766913718396822e-05, "loss": 0.5871412754058838, "step": 5830 }, { "epoch": 7.154601226993865, "grad_norm": 0.31845253705978394, "learning_rate": 3.766480999134249e-05, "loss": 0.6251031160354614, "step": 5831 }, { "epoch": 7.155828220858895, "grad_norm": 0.27867642045021057, "learning_rate": 3.76604822882344e-05, "loss": 0.7204362154006958, "step": 5832 }, { "epoch": 7.157055214723926, "grad_norm": 0.2987242341041565, "learning_rate": 3.7656154074818374e-05, "loss": 0.5048660039901733, "step": 5833 }, { "epoch": 7.1582822085889575, "grad_norm": 0.2961345911026001, "learning_rate": 3.7651825351268865e-05, "loss": 0.7541244029998779, "step": 5834 }, { "epoch": 7.159509202453988, "grad_norm": 0.2745179533958435, "learning_rate": 3.764749611776037e-05, "loss": 0.8138307332992554, "step": 5835 }, { "epoch": 7.160736196319019, "grad_norm": 0.28013256192207336, "learning_rate": 3.7643166374467384e-05, "loss": 0.5163321495056152, "step": 5836 }, { "epoch": 7.161963190184049, "grad_norm": 0.2734766900539398, "learning_rate": 3.7638836121564415e-05, "loss": 0.5868467092514038, "step": 5837 }, { "epoch": 7.16319018404908, "grad_norm": 0.2925722897052765, "learning_rate": 3.7634505359226e-05, "loss": 0.5290312170982361, "step": 5838 }, { "epoch": 7.16441717791411, "grad_norm": 0.3240354061126709, "learning_rate": 3.7630174087626724e-05, "loss": 0.6226696968078613, "step": 5839 }, { "epoch": 7.1656441717791415, "grad_norm": 0.23107457160949707, "learning_rate": 3.762584230694114e-05, "loss": 0.8175215125083923, "step": 5840 }, { "epoch": 7.166871165644172, "grad_norm": 0.3181336224079132, "learning_rate": 3.7621510017343867e-05, "loss": 0.5800699591636658, "step": 5841 }, { "epoch": 7.168098159509203, "grad_norm": 0.23317082226276398, "learning_rate": 3.761717721900953e-05, "loss": 0.6526495218276978, "step": 5842 }, { "epoch": 7.169325153374233, "grad_norm": 0.27449214458465576, "learning_rate": 3.761284391211276e-05, "loss": 0.7524580955505371, "step": 5843 }, { "epoch": 7.170552147239264, "grad_norm": 0.32016101479530334, "learning_rate": 3.7608510096828224e-05, "loss": 0.5652564764022827, "step": 5844 }, { "epoch": 7.171779141104294, "grad_norm": 0.24005430936813354, "learning_rate": 3.7604175773330605e-05, "loss": 0.772176206111908, "step": 5845 }, { "epoch": 7.1730061349693255, "grad_norm": 0.29743266105651855, "learning_rate": 3.7599840941794604e-05, "loss": 0.6592501401901245, "step": 5846 }, { "epoch": 7.174233128834356, "grad_norm": 0.23235607147216797, "learning_rate": 3.759550560239497e-05, "loss": 0.7977102994918823, "step": 5847 }, { "epoch": 7.175460122699387, "grad_norm": 0.2833864688873291, "learning_rate": 3.759116975530641e-05, "loss": 0.5154645442962646, "step": 5848 }, { "epoch": 7.176687116564417, "grad_norm": 0.2653583288192749, "learning_rate": 3.7586833400703716e-05, "loss": 0.7495278120040894, "step": 5849 }, { "epoch": 7.177914110429448, "grad_norm": 0.2389947772026062, "learning_rate": 3.758249653876166e-05, "loss": 0.6784563064575195, "step": 5850 }, { "epoch": 7.179141104294478, "grad_norm": 0.3487577438354492, "learning_rate": 3.757815916965506e-05, "loss": 0.5827918648719788, "step": 5851 }, { "epoch": 7.1803680981595095, "grad_norm": 0.2712430953979492, "learning_rate": 3.757382129355873e-05, "loss": 0.8272120952606201, "step": 5852 }, { "epoch": 7.18159509202454, "grad_norm": 0.2821471691131592, "learning_rate": 3.756948291064754e-05, "loss": 0.8547326326370239, "step": 5853 }, { "epoch": 7.182822085889571, "grad_norm": 0.27239346504211426, "learning_rate": 3.7565144021096336e-05, "loss": 0.6163700819015503, "step": 5854 }, { "epoch": 7.184049079754601, "grad_norm": 0.25848549604415894, "learning_rate": 3.756080462508001e-05, "loss": 0.8287057876586914, "step": 5855 }, { "epoch": 7.185276073619632, "grad_norm": 0.29675227403640747, "learning_rate": 3.755646472277348e-05, "loss": 0.6069833040237427, "step": 5856 }, { "epoch": 7.186503067484662, "grad_norm": 0.2882864773273468, "learning_rate": 3.755212431435167e-05, "loss": 0.8176522254943848, "step": 5857 }, { "epoch": 7.1877300613496935, "grad_norm": 0.280961811542511, "learning_rate": 3.754778339998952e-05, "loss": 0.6142727136611938, "step": 5858 }, { "epoch": 7.188957055214724, "grad_norm": 0.25368785858154297, "learning_rate": 3.7543441979862014e-05, "loss": 0.7050896883010864, "step": 5859 }, { "epoch": 7.190184049079755, "grad_norm": 0.23087388277053833, "learning_rate": 3.753910005414413e-05, "loss": 0.6492612361907959, "step": 5860 }, { "epoch": 7.191411042944785, "grad_norm": 0.28014060854911804, "learning_rate": 3.7534757623010894e-05, "loss": 0.5398808717727661, "step": 5861 }, { "epoch": 7.192638036809816, "grad_norm": 0.22988241910934448, "learning_rate": 3.7530414686637325e-05, "loss": 0.8138746023178101, "step": 5862 }, { "epoch": 7.193865030674846, "grad_norm": 0.24552664160728455, "learning_rate": 3.752607124519848e-05, "loss": 0.7257039546966553, "step": 5863 }, { "epoch": 7.1950920245398775, "grad_norm": 0.2774299085140228, "learning_rate": 3.752172729886943e-05, "loss": 0.7795084714889526, "step": 5864 }, { "epoch": 7.196319018404908, "grad_norm": 0.24204446375370026, "learning_rate": 3.7517382847825253e-05, "loss": 0.7029809951782227, "step": 5865 }, { "epoch": 7.197546012269939, "grad_norm": 0.3036806881427765, "learning_rate": 3.7513037892241094e-05, "loss": 0.6967291235923767, "step": 5866 }, { "epoch": 7.198773006134969, "grad_norm": 0.3252890408039093, "learning_rate": 3.7508692432292054e-05, "loss": 0.6488743424415588, "step": 5867 }, { "epoch": 7.2, "grad_norm": 0.3054174780845642, "learning_rate": 3.7504346468153306e-05, "loss": 0.5202521085739136, "step": 5868 }, { "epoch": 7.20122699386503, "grad_norm": 0.2390710860490799, "learning_rate": 3.7500000000000003e-05, "loss": 0.9256323575973511, "step": 5869 }, { "epoch": 7.2024539877300615, "grad_norm": 0.2458024024963379, "learning_rate": 3.749565302800736e-05, "loss": 0.8570212125778198, "step": 5870 }, { "epoch": 7.203680981595092, "grad_norm": 0.24437925219535828, "learning_rate": 3.7491305552350576e-05, "loss": 0.6093120574951172, "step": 5871 }, { "epoch": 7.204907975460123, "grad_norm": 0.2111169993877411, "learning_rate": 3.748695757320489e-05, "loss": 0.8654959201812744, "step": 5872 }, { "epoch": 7.206134969325153, "grad_norm": 0.22209423780441284, "learning_rate": 3.748260909074557e-05, "loss": 0.8684577941894531, "step": 5873 }, { "epoch": 7.207361963190184, "grad_norm": 0.3126440644264221, "learning_rate": 3.747826010514787e-05, "loss": 0.5990093946456909, "step": 5874 }, { "epoch": 7.208588957055214, "grad_norm": 0.2408103495836258, "learning_rate": 3.74739106165871e-05, "loss": 0.6196020841598511, "step": 5875 }, { "epoch": 7.2098159509202455, "grad_norm": 0.41831067204475403, "learning_rate": 3.7469560625238564e-05, "loss": 0.5088105201721191, "step": 5876 }, { "epoch": 7.211042944785276, "grad_norm": 0.2668294310569763, "learning_rate": 3.746521013127761e-05, "loss": 0.8273571729660034, "step": 5877 }, { "epoch": 7.212269938650307, "grad_norm": 0.21992981433868408, "learning_rate": 3.746085913487958e-05, "loss": 0.7091010808944702, "step": 5878 }, { "epoch": 7.213496932515337, "grad_norm": 0.30943024158477783, "learning_rate": 3.745650763621986e-05, "loss": 0.646435022354126, "step": 5879 }, { "epoch": 7.214723926380368, "grad_norm": 0.22519436478614807, "learning_rate": 3.745215563547385e-05, "loss": 0.8957622051239014, "step": 5880 }, { "epoch": 7.215950920245398, "grad_norm": 0.255470335483551, "learning_rate": 3.7447803132816954e-05, "loss": 0.7656986117362976, "step": 5881 }, { "epoch": 7.2171779141104295, "grad_norm": 0.3409481644630432, "learning_rate": 3.744345012842461e-05, "loss": 0.4325711727142334, "step": 5882 }, { "epoch": 7.21840490797546, "grad_norm": 0.26288533210754395, "learning_rate": 3.7439096622472296e-05, "loss": 0.7362896203994751, "step": 5883 }, { "epoch": 7.219631901840491, "grad_norm": 0.2598109245300293, "learning_rate": 3.743474261513546e-05, "loss": 0.5266495943069458, "step": 5884 }, { "epoch": 7.220858895705521, "grad_norm": 0.30933377146720886, "learning_rate": 3.743038810658963e-05, "loss": 0.7741132974624634, "step": 5885 }, { "epoch": 7.222085889570552, "grad_norm": 0.26965847611427307, "learning_rate": 3.74260330970103e-05, "loss": 0.6951575875282288, "step": 5886 }, { "epoch": 7.223312883435582, "grad_norm": 0.3003225028514862, "learning_rate": 3.742167758657301e-05, "loss": 0.61228346824646, "step": 5887 }, { "epoch": 7.2245398773006135, "grad_norm": 0.2562004029750824, "learning_rate": 3.741732157545334e-05, "loss": 0.7349822521209717, "step": 5888 }, { "epoch": 7.225766871165644, "grad_norm": 0.21324940025806427, "learning_rate": 3.741296506382684e-05, "loss": 0.7769694924354553, "step": 5889 }, { "epoch": 7.226993865030675, "grad_norm": 0.2537837624549866, "learning_rate": 3.740860805186913e-05, "loss": 0.6608051061630249, "step": 5890 }, { "epoch": 7.228220858895705, "grad_norm": 0.24547941982746124, "learning_rate": 3.740425053975581e-05, "loss": 0.7941008806228638, "step": 5891 }, { "epoch": 7.229447852760736, "grad_norm": 0.2822403311729431, "learning_rate": 3.7399892527662534e-05, "loss": 0.7208014726638794, "step": 5892 }, { "epoch": 7.230674846625767, "grad_norm": 0.2174280285835266, "learning_rate": 3.7395534015764955e-05, "loss": 0.6450596451759338, "step": 5893 }, { "epoch": 7.2319018404907975, "grad_norm": 0.29205551743507385, "learning_rate": 3.739117500423875e-05, "loss": 0.6323642730712891, "step": 5894 }, { "epoch": 7.233128834355828, "grad_norm": 0.26782092452049255, "learning_rate": 3.738681549325962e-05, "loss": 0.7375587224960327, "step": 5895 }, { "epoch": 7.234355828220859, "grad_norm": 0.2150966376066208, "learning_rate": 3.738245548300329e-05, "loss": 0.8658962249755859, "step": 5896 }, { "epoch": 7.23558282208589, "grad_norm": 0.2616908550262451, "learning_rate": 3.73780949736455e-05, "loss": 0.7862038612365723, "step": 5897 }, { "epoch": 7.23680981595092, "grad_norm": 0.29672521352767944, "learning_rate": 3.7373733965362e-05, "loss": 0.5963200330734253, "step": 5898 }, { "epoch": 7.238036809815951, "grad_norm": 0.3113231658935547, "learning_rate": 3.736937245832857e-05, "loss": 0.6769604682922363, "step": 5899 }, { "epoch": 7.2392638036809815, "grad_norm": 0.2739061713218689, "learning_rate": 3.736501045272103e-05, "loss": 0.7149029970169067, "step": 5900 }, { "epoch": 7.240490797546013, "grad_norm": 0.2423282116651535, "learning_rate": 3.7360647948715164e-05, "loss": 0.7504695653915405, "step": 5901 }, { "epoch": 7.241717791411043, "grad_norm": 0.27253440022468567, "learning_rate": 3.735628494648685e-05, "loss": 0.7267919778823853, "step": 5902 }, { "epoch": 7.242944785276074, "grad_norm": 0.25043612718582153, "learning_rate": 3.735192144621192e-05, "loss": 0.8606990575790405, "step": 5903 }, { "epoch": 7.244171779141104, "grad_norm": 0.37484002113342285, "learning_rate": 3.734755744806627e-05, "loss": 0.6550102829933167, "step": 5904 }, { "epoch": 7.245398773006135, "grad_norm": 0.22693966329097748, "learning_rate": 3.7343192952225796e-05, "loss": 0.8993436694145203, "step": 5905 }, { "epoch": 7.2466257668711656, "grad_norm": 0.3265964686870575, "learning_rate": 3.733882795886642e-05, "loss": 0.4192725419998169, "step": 5906 }, { "epoch": 7.247852760736197, "grad_norm": 0.27467337250709534, "learning_rate": 3.733446246816408e-05, "loss": 0.7777267694473267, "step": 5907 }, { "epoch": 7.249079754601227, "grad_norm": 0.3332544267177582, "learning_rate": 3.733009648029473e-05, "loss": 0.7195776700973511, "step": 5908 }, { "epoch": 7.250306748466258, "grad_norm": 0.25211256742477417, "learning_rate": 3.7325729995434367e-05, "loss": 0.922743558883667, "step": 5909 }, { "epoch": 7.251533742331288, "grad_norm": 0.20971161127090454, "learning_rate": 3.7321363013758976e-05, "loss": 0.574110746383667, "step": 5910 }, { "epoch": 7.252760736196319, "grad_norm": 0.23152869939804077, "learning_rate": 3.731699553544458e-05, "loss": 0.8193444013595581, "step": 5911 }, { "epoch": 7.25398773006135, "grad_norm": 0.3226180672645569, "learning_rate": 3.7312627560667235e-05, "loss": 0.5275994539260864, "step": 5912 }, { "epoch": 7.255214723926381, "grad_norm": 0.29822275042533875, "learning_rate": 3.730825908960298e-05, "loss": 0.6227304935455322, "step": 5913 }, { "epoch": 7.256441717791411, "grad_norm": 0.21912619471549988, "learning_rate": 3.730389012242791e-05, "loss": 0.754523515701294, "step": 5914 }, { "epoch": 7.257668711656442, "grad_norm": 0.30813896656036377, "learning_rate": 3.729952065931812e-05, "loss": 0.6255925893783569, "step": 5915 }, { "epoch": 7.258895705521472, "grad_norm": 0.40655770897865295, "learning_rate": 3.729515070044972e-05, "loss": 0.6511353254318237, "step": 5916 }, { "epoch": 7.260122699386503, "grad_norm": 0.29114729166030884, "learning_rate": 3.729078024599888e-05, "loss": 0.7803131937980652, "step": 5917 }, { "epoch": 7.261349693251534, "grad_norm": 0.30798158049583435, "learning_rate": 3.7286409296141726e-05, "loss": 0.7163150310516357, "step": 5918 }, { "epoch": 7.262576687116565, "grad_norm": 0.20878012478351593, "learning_rate": 3.728203785105447e-05, "loss": 0.8203938007354736, "step": 5919 }, { "epoch": 7.263803680981595, "grad_norm": 0.24859201908111572, "learning_rate": 3.727766591091329e-05, "loss": 0.7496389150619507, "step": 5920 }, { "epoch": 7.265030674846626, "grad_norm": 0.2836188077926636, "learning_rate": 3.7273293475894425e-05, "loss": 0.6735619306564331, "step": 5921 }, { "epoch": 7.266257668711656, "grad_norm": 0.2915969491004944, "learning_rate": 3.726892054617409e-05, "loss": 0.7030874490737915, "step": 5922 }, { "epoch": 7.267484662576687, "grad_norm": 0.24283714592456818, "learning_rate": 3.726454712192856e-05, "loss": 0.6126163601875305, "step": 5923 }, { "epoch": 7.268711656441718, "grad_norm": 0.2773953080177307, "learning_rate": 3.726017320333413e-05, "loss": 0.7424055337905884, "step": 5924 }, { "epoch": 7.269938650306749, "grad_norm": 0.23654215037822723, "learning_rate": 3.725579879056707e-05, "loss": 0.5875113606452942, "step": 5925 }, { "epoch": 7.271165644171779, "grad_norm": 0.2761337459087372, "learning_rate": 3.725142388380373e-05, "loss": 0.6144498586654663, "step": 5926 }, { "epoch": 7.27239263803681, "grad_norm": 0.28724241256713867, "learning_rate": 3.724704848322041e-05, "loss": 0.8427271842956543, "step": 5927 }, { "epoch": 7.27361963190184, "grad_norm": 0.2322854995727539, "learning_rate": 3.724267258899352e-05, "loss": 0.6869851350784302, "step": 5928 }, { "epoch": 7.274846625766871, "grad_norm": 0.25085797905921936, "learning_rate": 3.723829620129941e-05, "loss": 0.6525393724441528, "step": 5929 }, { "epoch": 7.276073619631902, "grad_norm": 0.23465116322040558, "learning_rate": 3.723391932031448e-05, "loss": 0.8082728385925293, "step": 5930 }, { "epoch": 7.277300613496933, "grad_norm": 0.19480019807815552, "learning_rate": 3.722954194621516e-05, "loss": 0.8269927501678467, "step": 5931 }, { "epoch": 7.278527607361963, "grad_norm": 0.32265526056289673, "learning_rate": 3.7225164079177874e-05, "loss": 0.5555946826934814, "step": 5932 }, { "epoch": 7.279754601226994, "grad_norm": 0.323129802942276, "learning_rate": 3.72207857193791e-05, "loss": 0.6961036324501038, "step": 5933 }, { "epoch": 7.280981595092024, "grad_norm": 0.20527102053165436, "learning_rate": 3.7216406866995306e-05, "loss": 0.7404626607894897, "step": 5934 }, { "epoch": 7.282208588957055, "grad_norm": 0.3219456672668457, "learning_rate": 3.721202752220299e-05, "loss": 0.7041354179382324, "step": 5935 }, { "epoch": 7.283435582822086, "grad_norm": 0.2768450677394867, "learning_rate": 3.720764768517868e-05, "loss": 0.773711085319519, "step": 5936 }, { "epoch": 7.284662576687117, "grad_norm": 0.3001960813999176, "learning_rate": 3.7203267356098905e-05, "loss": 0.552647054195404, "step": 5937 }, { "epoch": 7.285889570552147, "grad_norm": 0.3531043529510498, "learning_rate": 3.719888653514023e-05, "loss": 0.39533740282058716, "step": 5938 }, { "epoch": 7.287116564417178, "grad_norm": 0.2841101884841919, "learning_rate": 3.719450522247923e-05, "loss": 0.48672372102737427, "step": 5939 }, { "epoch": 7.288343558282208, "grad_norm": 0.2752114236354828, "learning_rate": 3.719012341829251e-05, "loss": 0.6280807256698608, "step": 5940 }, { "epoch": 7.289570552147239, "grad_norm": 0.2431054264307022, "learning_rate": 3.7185741122756674e-05, "loss": 0.710776686668396, "step": 5941 }, { "epoch": 7.29079754601227, "grad_norm": 0.24383696913719177, "learning_rate": 3.718135833604837e-05, "loss": 0.7078604102134705, "step": 5942 }, { "epoch": 7.292024539877301, "grad_norm": 0.25038576126098633, "learning_rate": 3.717697505834426e-05, "loss": 0.8882955312728882, "step": 5943 }, { "epoch": 7.293251533742331, "grad_norm": 0.26133447885513306, "learning_rate": 3.717259128982101e-05, "loss": 0.5515918135643005, "step": 5944 }, { "epoch": 7.294478527607362, "grad_norm": 0.26410603523254395, "learning_rate": 3.716820703065532e-05, "loss": 0.7101797461509705, "step": 5945 }, { "epoch": 7.295705521472392, "grad_norm": 0.26916590332984924, "learning_rate": 3.7163822281023914e-05, "loss": 0.6919670104980469, "step": 5946 }, { "epoch": 7.296932515337423, "grad_norm": 0.2671968638896942, "learning_rate": 3.7159437041103524e-05, "loss": 0.5403258800506592, "step": 5947 }, { "epoch": 7.298159509202454, "grad_norm": 0.21495532989501953, "learning_rate": 3.715505131107091e-05, "loss": 0.9876974821090698, "step": 5948 }, { "epoch": 7.299386503067485, "grad_norm": 0.24087640643119812, "learning_rate": 3.715066509110283e-05, "loss": 0.7445148229598999, "step": 5949 }, { "epoch": 7.300613496932515, "grad_norm": 0.21084165573120117, "learning_rate": 3.714627838137611e-05, "loss": 0.7625389695167542, "step": 5950 }, { "epoch": 7.301840490797546, "grad_norm": 0.3103569746017456, "learning_rate": 3.714189118206754e-05, "loss": 0.5674145817756653, "step": 5951 }, { "epoch": 7.303067484662577, "grad_norm": 0.23410043120384216, "learning_rate": 3.713750349335398e-05, "loss": 0.75477135181427, "step": 5952 }, { "epoch": 7.304294478527607, "grad_norm": 0.26156359910964966, "learning_rate": 3.713311531541226e-05, "loss": 0.7253727912902832, "step": 5953 }, { "epoch": 7.305521472392638, "grad_norm": 0.23403404653072357, "learning_rate": 3.7128726648419266e-05, "loss": 0.777431845664978, "step": 5954 }, { "epoch": 7.306748466257669, "grad_norm": 0.24322709441184998, "learning_rate": 3.71243374925519e-05, "loss": 0.7763588428497314, "step": 5955 }, { "epoch": 7.3079754601227, "grad_norm": 0.3612135946750641, "learning_rate": 3.711994784798707e-05, "loss": 0.6295858025550842, "step": 5956 }, { "epoch": 7.30920245398773, "grad_norm": 0.26691916584968567, "learning_rate": 3.71155577149017e-05, "loss": 0.7627469301223755, "step": 5957 }, { "epoch": 7.31042944785276, "grad_norm": 0.230428546667099, "learning_rate": 3.7111167093472763e-05, "loss": 0.6883286237716675, "step": 5958 }, { "epoch": 7.311656441717791, "grad_norm": 0.3303476572036743, "learning_rate": 3.710677598387722e-05, "loss": 0.6533423662185669, "step": 5959 }, { "epoch": 7.3128834355828225, "grad_norm": 0.2536064684391022, "learning_rate": 3.710238438629206e-05, "loss": 0.7871478796005249, "step": 5960 }, { "epoch": 7.314110429447853, "grad_norm": 0.21601241827011108, "learning_rate": 3.70979923008943e-05, "loss": 0.6662642955780029, "step": 5961 }, { "epoch": 7.315337423312884, "grad_norm": 0.2826128602027893, "learning_rate": 3.709359972786099e-05, "loss": 0.7298769950866699, "step": 5962 }, { "epoch": 7.316564417177914, "grad_norm": 0.312913179397583, "learning_rate": 3.708920666736916e-05, "loss": 0.6861037611961365, "step": 5963 }, { "epoch": 7.317791411042945, "grad_norm": 0.2911546528339386, "learning_rate": 3.7084813119595885e-05, "loss": 0.7902883291244507, "step": 5964 }, { "epoch": 7.319018404907975, "grad_norm": 0.32869279384613037, "learning_rate": 3.708041908471827e-05, "loss": 0.5775120854377747, "step": 5965 }, { "epoch": 7.3202453987730065, "grad_norm": 0.274505615234375, "learning_rate": 3.7076024562913405e-05, "loss": 0.808906078338623, "step": 5966 }, { "epoch": 7.321472392638037, "grad_norm": 0.2778261601924896, "learning_rate": 3.707162955435844e-05, "loss": 0.8857842087745667, "step": 5967 }, { "epoch": 7.322699386503068, "grad_norm": 0.24506264925003052, "learning_rate": 3.706723405923051e-05, "loss": 0.6308045387268066, "step": 5968 }, { "epoch": 7.323926380368098, "grad_norm": 0.28829073905944824, "learning_rate": 3.706283807770679e-05, "loss": 0.6626943945884705, "step": 5969 }, { "epoch": 7.325153374233129, "grad_norm": 0.26006850600242615, "learning_rate": 3.705844160996448e-05, "loss": 0.6277354955673218, "step": 5970 }, { "epoch": 7.326380368098159, "grad_norm": 0.24264191091060638, "learning_rate": 3.705404465618077e-05, "loss": 0.8652329444885254, "step": 5971 }, { "epoch": 7.3276073619631905, "grad_norm": 0.29976677894592285, "learning_rate": 3.7049647216532914e-05, "loss": 0.591698944568634, "step": 5972 }, { "epoch": 7.328834355828221, "grad_norm": 0.23363077640533447, "learning_rate": 3.704524929119814e-05, "loss": 0.7591235637664795, "step": 5973 }, { "epoch": 7.330061349693252, "grad_norm": 0.27922847867012024, "learning_rate": 3.7040850880353715e-05, "loss": 0.693358302116394, "step": 5974 }, { "epoch": 7.331288343558282, "grad_norm": 0.26256492733955383, "learning_rate": 3.7036451984176936e-05, "loss": 0.7703017592430115, "step": 5975 }, { "epoch": 7.332515337423313, "grad_norm": 0.263408362865448, "learning_rate": 3.70320526028451e-05, "loss": 0.8561206459999084, "step": 5976 }, { "epoch": 7.333742331288343, "grad_norm": 0.26713982224464417, "learning_rate": 3.7027652736535554e-05, "loss": 0.8117473125457764, "step": 5977 }, { "epoch": 7.3349693251533745, "grad_norm": 0.3960604667663574, "learning_rate": 3.702325238542562e-05, "loss": 0.31759437918663025, "step": 5978 }, { "epoch": 7.336196319018405, "grad_norm": 0.22956615686416626, "learning_rate": 3.701885154969268e-05, "loss": 0.8323596715927124, "step": 5979 }, { "epoch": 7.337423312883436, "grad_norm": 0.2485274374485016, "learning_rate": 3.701445022951411e-05, "loss": 0.5699493885040283, "step": 5980 }, { "epoch": 7.338650306748466, "grad_norm": 0.3342575430870056, "learning_rate": 3.7010048425067314e-05, "loss": 0.6087357997894287, "step": 5981 }, { "epoch": 7.339877300613497, "grad_norm": 0.27690398693084717, "learning_rate": 3.700564613652973e-05, "loss": 0.7317876815795898, "step": 5982 }, { "epoch": 7.341104294478527, "grad_norm": 0.2317180335521698, "learning_rate": 3.7001243364078774e-05, "loss": 0.7560868263244629, "step": 5983 }, { "epoch": 7.3423312883435585, "grad_norm": 0.29037514328956604, "learning_rate": 3.699684010789194e-05, "loss": 0.5849393010139465, "step": 5984 }, { "epoch": 7.343558282208589, "grad_norm": 0.26977792382240295, "learning_rate": 3.6992436368146696e-05, "loss": 0.7554224729537964, "step": 5985 }, { "epoch": 7.34478527607362, "grad_norm": 0.22977042198181152, "learning_rate": 3.698803214502054e-05, "loss": 0.697186291217804, "step": 5986 }, { "epoch": 7.34601226993865, "grad_norm": 0.2820381820201874, "learning_rate": 3.6983627438691004e-05, "loss": 0.7523567080497742, "step": 5987 }, { "epoch": 7.347239263803681, "grad_norm": 0.29476290941238403, "learning_rate": 3.697922224933562e-05, "loss": 0.5832688808441162, "step": 5988 }, { "epoch": 7.348466257668711, "grad_norm": 0.20929601788520813, "learning_rate": 3.6974816577131964e-05, "loss": 0.6309667825698853, "step": 5989 }, { "epoch": 7.3496932515337425, "grad_norm": 0.3131815493106842, "learning_rate": 3.6970410422257595e-05, "loss": 0.608773946762085, "step": 5990 }, { "epoch": 7.350920245398773, "grad_norm": 0.2538700997829437, "learning_rate": 3.6966003784890124e-05, "loss": 0.7020589113235474, "step": 5991 }, { "epoch": 7.352147239263804, "grad_norm": 0.24229030311107635, "learning_rate": 3.696159666520717e-05, "loss": 0.6301773190498352, "step": 5992 }, { "epoch": 7.353374233128834, "grad_norm": 0.25026267766952515, "learning_rate": 3.6957189063386365e-05, "loss": 0.6516295075416565, "step": 5993 }, { "epoch": 7.354601226993865, "grad_norm": 0.265994668006897, "learning_rate": 3.6952780979605376e-05, "loss": 0.6903282403945923, "step": 5994 }, { "epoch": 7.355828220858895, "grad_norm": 0.24611693620681763, "learning_rate": 3.694837241404188e-05, "loss": 0.841045618057251, "step": 5995 }, { "epoch": 7.3570552147239265, "grad_norm": 0.2813248932361603, "learning_rate": 3.694396336687357e-05, "loss": 0.7072904109954834, "step": 5996 }, { "epoch": 7.358282208588957, "grad_norm": 0.2595456838607788, "learning_rate": 3.693955383827815e-05, "loss": 0.951714038848877, "step": 5997 }, { "epoch": 7.359509202453988, "grad_norm": 0.26549941301345825, "learning_rate": 3.693514382843338e-05, "loss": 0.6384869813919067, "step": 5998 }, { "epoch": 7.360736196319018, "grad_norm": 0.3199116587638855, "learning_rate": 3.6930733337517e-05, "loss": 0.6354997754096985, "step": 5999 }, { "epoch": 7.361963190184049, "grad_norm": 0.29144760966300964, "learning_rate": 3.692632236570678e-05, "loss": 0.6920925378799438, "step": 6000 }, { "epoch": 7.363190184049079, "grad_norm": 0.22988583147525787, "learning_rate": 3.6921910913180525e-05, "loss": 0.7725803852081299, "step": 6001 }, { "epoch": 7.3644171779141105, "grad_norm": 0.29000547528266907, "learning_rate": 3.691749898011604e-05, "loss": 0.7866990566253662, "step": 6002 }, { "epoch": 7.365644171779141, "grad_norm": 0.2980116009712219, "learning_rate": 3.691308656669117e-05, "loss": 0.6923649311065674, "step": 6003 }, { "epoch": 7.366871165644172, "grad_norm": 0.27338653802871704, "learning_rate": 3.690867367308375e-05, "loss": 0.8166097402572632, "step": 6004 }, { "epoch": 7.368098159509202, "grad_norm": 0.28506967425346375, "learning_rate": 3.690426029947165e-05, "loss": 0.6640734076499939, "step": 6005 }, { "epoch": 7.369325153374233, "grad_norm": 0.24410530924797058, "learning_rate": 3.6899846446032786e-05, "loss": 0.6561761498451233, "step": 6006 }, { "epoch": 7.370552147239263, "grad_norm": 0.28883591294288635, "learning_rate": 3.689543211294505e-05, "loss": 0.6732308864593506, "step": 6007 }, { "epoch": 7.3717791411042946, "grad_norm": 0.4129650294780731, "learning_rate": 3.689101730038637e-05, "loss": 0.4179384410381317, "step": 6008 }, { "epoch": 7.373006134969325, "grad_norm": 0.325752317905426, "learning_rate": 3.688660200853469e-05, "loss": 0.8559772372245789, "step": 6009 }, { "epoch": 7.374233128834356, "grad_norm": 0.2601349651813507, "learning_rate": 3.688218623756799e-05, "loss": 0.7794848680496216, "step": 6010 }, { "epoch": 7.375460122699386, "grad_norm": 0.34113356471061707, "learning_rate": 3.687776998766425e-05, "loss": 0.5673193335533142, "step": 6011 }, { "epoch": 7.376687116564417, "grad_norm": 0.2862967550754547, "learning_rate": 3.6873353259001476e-05, "loss": 0.8013805150985718, "step": 6012 }, { "epoch": 7.3779141104294474, "grad_norm": 0.2665300667285919, "learning_rate": 3.6868936051757705e-05, "loss": 0.591056764125824, "step": 6013 }, { "epoch": 7.379141104294479, "grad_norm": 0.3351050019264221, "learning_rate": 3.6864518366110965e-05, "loss": 0.5948877334594727, "step": 6014 }, { "epoch": 7.38036809815951, "grad_norm": 0.3604147136211395, "learning_rate": 3.686010020223933e-05, "loss": 0.6440649032592773, "step": 6015 }, { "epoch": 7.38159509202454, "grad_norm": 0.3332013189792633, "learning_rate": 3.6855681560320875e-05, "loss": 0.48669424653053284, "step": 6016 }, { "epoch": 7.38282208588957, "grad_norm": 0.2509443759918213, "learning_rate": 3.685126244053372e-05, "loss": 0.6766120195388794, "step": 6017 }, { "epoch": 7.384049079754601, "grad_norm": 0.17707842588424683, "learning_rate": 3.684684284305598e-05, "loss": 0.8617650866508484, "step": 6018 }, { "epoch": 7.385276073619632, "grad_norm": 0.30312520265579224, "learning_rate": 3.6842422768065785e-05, "loss": 0.6956269145011902, "step": 6019 }, { "epoch": 7.386503067484663, "grad_norm": 0.2757965624332428, "learning_rate": 3.6838002215741305e-05, "loss": 0.9521121382713318, "step": 6020 }, { "epoch": 7.387730061349693, "grad_norm": 0.29977595806121826, "learning_rate": 3.6833581186260716e-05, "loss": 0.6123598217964172, "step": 6021 }, { "epoch": 7.388957055214724, "grad_norm": 0.23854297399520874, "learning_rate": 3.682915967980223e-05, "loss": 0.860900342464447, "step": 6022 }, { "epoch": 7.390184049079755, "grad_norm": 0.2158062607049942, "learning_rate": 3.682473769654405e-05, "loss": 0.8777185082435608, "step": 6023 }, { "epoch": 7.391411042944785, "grad_norm": 0.24436187744140625, "learning_rate": 3.682031523666442e-05, "loss": 0.8098183274269104, "step": 6024 }, { "epoch": 7.392638036809816, "grad_norm": 0.35734662413597107, "learning_rate": 3.681589230034159e-05, "loss": 0.5958710312843323, "step": 6025 }, { "epoch": 7.393865030674847, "grad_norm": 0.21628470718860626, "learning_rate": 3.681146888775384e-05, "loss": 0.7908397912979126, "step": 6026 }, { "epoch": 7.395092024539878, "grad_norm": 0.29035693407058716, "learning_rate": 3.680704499907947e-05, "loss": 0.852215051651001, "step": 6027 }, { "epoch": 7.396319018404908, "grad_norm": 0.26431310176849365, "learning_rate": 3.6802620634496796e-05, "loss": 0.8446894884109497, "step": 6028 }, { "epoch": 7.397546012269939, "grad_norm": 0.2357725203037262, "learning_rate": 3.679819579418414e-05, "loss": 0.5992003083229065, "step": 6029 }, { "epoch": 7.398773006134969, "grad_norm": 0.2987845838069916, "learning_rate": 3.679377047831986e-05, "loss": 0.61667400598526, "step": 6030 }, { "epoch": 7.4, "grad_norm": 0.2823892831802368, "learning_rate": 3.678934468708233e-05, "loss": 0.7555280327796936, "step": 6031 }, { "epoch": 7.401226993865031, "grad_norm": 0.24891379475593567, "learning_rate": 3.678491842064995e-05, "loss": 0.6582583785057068, "step": 6032 }, { "epoch": 7.402453987730062, "grad_norm": 0.4403936564922333, "learning_rate": 3.678049167920111e-05, "loss": 0.7621873617172241, "step": 6033 }, { "epoch": 7.403680981595092, "grad_norm": 0.3321428894996643, "learning_rate": 3.677606446291424e-05, "loss": 0.6957041621208191, "step": 6034 }, { "epoch": 7.404907975460123, "grad_norm": 0.2725323438644409, "learning_rate": 3.677163677196782e-05, "loss": 0.8275620937347412, "step": 6035 }, { "epoch": 7.406134969325153, "grad_norm": 0.27202296257019043, "learning_rate": 3.676720860654028e-05, "loss": 0.7138993740081787, "step": 6036 }, { "epoch": 7.407361963190184, "grad_norm": 0.3024255633354187, "learning_rate": 3.6762779966810124e-05, "loss": 0.4729641079902649, "step": 6037 }, { "epoch": 7.408588957055215, "grad_norm": 0.2843673527240753, "learning_rate": 3.6758350852955856e-05, "loss": 0.7056938409805298, "step": 6038 }, { "epoch": 7.409815950920246, "grad_norm": 0.27114182710647583, "learning_rate": 3.6753921265156005e-05, "loss": 0.6332859396934509, "step": 6039 }, { "epoch": 7.411042944785276, "grad_norm": 0.28238654136657715, "learning_rate": 3.674949120358911e-05, "loss": 0.744927167892456, "step": 6040 }, { "epoch": 7.412269938650307, "grad_norm": 0.2855590283870697, "learning_rate": 3.674506066843374e-05, "loss": 0.6652286052703857, "step": 6041 }, { "epoch": 7.413496932515337, "grad_norm": 0.3397871255874634, "learning_rate": 3.674062965986847e-05, "loss": 0.6301367282867432, "step": 6042 }, { "epoch": 7.414723926380368, "grad_norm": 0.38263487815856934, "learning_rate": 3.67361981780719e-05, "loss": 0.4845014810562134, "step": 6043 }, { "epoch": 7.415950920245399, "grad_norm": 0.3010315001010895, "learning_rate": 3.673176622322265e-05, "loss": 0.6156700849533081, "step": 6044 }, { "epoch": 7.41717791411043, "grad_norm": 0.25290647149086, "learning_rate": 3.6727333795499375e-05, "loss": 0.6048744320869446, "step": 6045 }, { "epoch": 7.41840490797546, "grad_norm": 0.22444704174995422, "learning_rate": 3.6722900895080715e-05, "loss": 0.8787493705749512, "step": 6046 }, { "epoch": 7.419631901840491, "grad_norm": 0.25376954674720764, "learning_rate": 3.6718467522145357e-05, "loss": 0.763236403465271, "step": 6047 }, { "epoch": 7.420858895705521, "grad_norm": 0.2500990927219391, "learning_rate": 3.6714033676871995e-05, "loss": 0.7378139495849609, "step": 6048 }, { "epoch": 7.422085889570552, "grad_norm": 0.21956108510494232, "learning_rate": 3.670959935943934e-05, "loss": 0.6757520437240601, "step": 6049 }, { "epoch": 7.423312883435583, "grad_norm": 0.3013949394226074, "learning_rate": 3.670516457002613e-05, "loss": 0.6146975755691528, "step": 6050 }, { "epoch": 7.424539877300614, "grad_norm": 0.22737979888916016, "learning_rate": 3.670072930881113e-05, "loss": 0.7488701343536377, "step": 6051 }, { "epoch": 7.425766871165644, "grad_norm": 0.31561315059661865, "learning_rate": 3.66962935759731e-05, "loss": 0.7621356248855591, "step": 6052 }, { "epoch": 7.426993865030675, "grad_norm": 0.2992718815803528, "learning_rate": 3.669185737169083e-05, "loss": 0.5031852722167969, "step": 6053 }, { "epoch": 7.428220858895705, "grad_norm": 0.29102715849876404, "learning_rate": 3.668742069614314e-05, "loss": 0.7472278475761414, "step": 6054 }, { "epoch": 7.429447852760736, "grad_norm": 0.23206397891044617, "learning_rate": 3.668298354950886e-05, "loss": 0.6595354080200195, "step": 6055 }, { "epoch": 7.430674846625767, "grad_norm": 0.30690690875053406, "learning_rate": 3.6678545931966826e-05, "loss": 0.7381862998008728, "step": 6056 }, { "epoch": 7.431901840490798, "grad_norm": 0.2870936095714569, "learning_rate": 3.667410784369592e-05, "loss": 0.6478077173233032, "step": 6057 }, { "epoch": 7.433128834355828, "grad_norm": 0.2763238251209259, "learning_rate": 3.6669669284875016e-05, "loss": 0.7547552585601807, "step": 6058 }, { "epoch": 7.434355828220859, "grad_norm": 0.25717422366142273, "learning_rate": 3.666523025568303e-05, "loss": 0.7832391262054443, "step": 6059 }, { "epoch": 7.435582822085889, "grad_norm": 0.306364506483078, "learning_rate": 3.666079075629888e-05, "loss": 0.5971744060516357, "step": 6060 }, { "epoch": 7.43680981595092, "grad_norm": 0.30538663268089294, "learning_rate": 3.6656350786901516e-05, "loss": 0.6234532594680786, "step": 6061 }, { "epoch": 7.438036809815951, "grad_norm": 0.3999567925930023, "learning_rate": 3.665191034766989e-05, "loss": 0.3788191080093384, "step": 6062 }, { "epoch": 7.439263803680982, "grad_norm": 0.27893075346946716, "learning_rate": 3.6647469438782995e-05, "loss": 0.8048558235168457, "step": 6063 }, { "epoch": 7.440490797546012, "grad_norm": 0.3233790993690491, "learning_rate": 3.6643028060419834e-05, "loss": 0.3549251854419708, "step": 6064 }, { "epoch": 7.441717791411043, "grad_norm": 0.280805766582489, "learning_rate": 3.663858621275941e-05, "loss": 0.8194990158081055, "step": 6065 }, { "epoch": 7.442944785276073, "grad_norm": 0.25447291135787964, "learning_rate": 3.663414389598077e-05, "loss": 0.7474569082260132, "step": 6066 }, { "epoch": 7.444171779141104, "grad_norm": 0.2804884910583496, "learning_rate": 3.662970111026297e-05, "loss": 0.5988985300064087, "step": 6067 }, { "epoch": 7.445398773006135, "grad_norm": 0.20425479114055634, "learning_rate": 3.662525785578509e-05, "loss": 0.8100757598876953, "step": 6068 }, { "epoch": 7.446625766871166, "grad_norm": 0.22950159013271332, "learning_rate": 3.662081413272622e-05, "loss": 0.7478692531585693, "step": 6069 }, { "epoch": 7.447852760736196, "grad_norm": 0.24453523755073547, "learning_rate": 3.6616369941265484e-05, "loss": 0.820938229560852, "step": 6070 }, { "epoch": 7.449079754601227, "grad_norm": 0.38810786604881287, "learning_rate": 3.661192528158199e-05, "loss": 0.5968906283378601, "step": 6071 }, { "epoch": 7.450306748466257, "grad_norm": 0.2609988749027252, "learning_rate": 3.660748015385492e-05, "loss": 0.628275990486145, "step": 6072 }, { "epoch": 7.451533742331288, "grad_norm": 0.269966721534729, "learning_rate": 3.6603034558263425e-05, "loss": 0.7126877903938293, "step": 6073 }, { "epoch": 7.452760736196319, "grad_norm": 0.30554983019828796, "learning_rate": 3.65985884949867e-05, "loss": 0.6842658519744873, "step": 6074 }, { "epoch": 7.45398773006135, "grad_norm": 0.22020326554775238, "learning_rate": 3.659414196420395e-05, "loss": 0.8351356983184814, "step": 6075 }, { "epoch": 7.45521472392638, "grad_norm": 0.2979484498500824, "learning_rate": 3.658969496609442e-05, "loss": 0.5645744204521179, "step": 6076 }, { "epoch": 7.456441717791411, "grad_norm": 0.268608033657074, "learning_rate": 3.6585247500837326e-05, "loss": 0.6842474937438965, "step": 6077 }, { "epoch": 7.457668711656442, "grad_norm": 0.23101232945919037, "learning_rate": 3.6580799568611955e-05, "loss": 0.7065773010253906, "step": 6078 }, { "epoch": 7.458895705521472, "grad_norm": 0.24645572900772095, "learning_rate": 3.6576351169597575e-05, "loss": 0.8452670574188232, "step": 6079 }, { "epoch": 7.460122699386503, "grad_norm": 0.25939714908599854, "learning_rate": 3.65719023039735e-05, "loss": 0.6732742786407471, "step": 6080 }, { "epoch": 7.461349693251534, "grad_norm": 0.32462725043296814, "learning_rate": 3.656745297191906e-05, "loss": 0.6376693844795227, "step": 6081 }, { "epoch": 7.462576687116565, "grad_norm": 0.29365310072898865, "learning_rate": 3.656300317361356e-05, "loss": 0.6451166868209839, "step": 6082 }, { "epoch": 7.463803680981595, "grad_norm": 0.2697858512401581, "learning_rate": 3.655855290923639e-05, "loss": 0.7903419733047485, "step": 6083 }, { "epoch": 7.465030674846625, "grad_norm": 0.25576773285865784, "learning_rate": 3.655410217896692e-05, "loss": 0.6498997807502747, "step": 6084 }, { "epoch": 7.466257668711656, "grad_norm": 0.2655707895755768, "learning_rate": 3.6549650982984544e-05, "loss": 0.6090746521949768, "step": 6085 }, { "epoch": 7.4674846625766875, "grad_norm": 0.3805531859397888, "learning_rate": 3.654519932146868e-05, "loss": 0.6811898946762085, "step": 6086 }, { "epoch": 7.468711656441718, "grad_norm": 0.2509288191795349, "learning_rate": 3.6540747194598755e-05, "loss": 0.751589834690094, "step": 6087 }, { "epoch": 7.469938650306749, "grad_norm": 0.3661097288131714, "learning_rate": 3.6536294602554236e-05, "loss": 0.5885169506072998, "step": 6088 }, { "epoch": 7.471165644171779, "grad_norm": 0.5360947847366333, "learning_rate": 3.653184154551457e-05, "loss": 0.7429975867271423, "step": 6089 }, { "epoch": 7.47239263803681, "grad_norm": 0.24839535355567932, "learning_rate": 3.652738802365926e-05, "loss": 0.710459291934967, "step": 6090 }, { "epoch": 7.47361963190184, "grad_norm": 0.2651735246181488, "learning_rate": 3.6522934037167835e-05, "loss": 0.8008020520210266, "step": 6091 }, { "epoch": 7.4748466257668715, "grad_norm": 0.2534352242946625, "learning_rate": 3.651847958621978e-05, "loss": 0.5969338417053223, "step": 6092 }, { "epoch": 7.476073619631902, "grad_norm": 0.3169097900390625, "learning_rate": 3.651402467099468e-05, "loss": 0.6346070766448975, "step": 6093 }, { "epoch": 7.477300613496933, "grad_norm": 0.30490103363990784, "learning_rate": 3.650956929167208e-05, "loss": 0.7394209504127502, "step": 6094 }, { "epoch": 7.478527607361963, "grad_norm": 0.3764651417732239, "learning_rate": 3.650511344843157e-05, "loss": 0.5698794722557068, "step": 6095 }, { "epoch": 7.479754601226994, "grad_norm": 0.29740557074546814, "learning_rate": 3.6500657141452744e-05, "loss": 0.6781386733055115, "step": 6096 }, { "epoch": 7.480981595092024, "grad_norm": 0.3070203363895416, "learning_rate": 3.649620037091524e-05, "loss": 0.6004297733306885, "step": 6097 }, { "epoch": 7.4822085889570555, "grad_norm": 0.25894078612327576, "learning_rate": 3.649174313699868e-05, "loss": 0.6798580884933472, "step": 6098 }, { "epoch": 7.483435582822086, "grad_norm": 0.3004358410835266, "learning_rate": 3.648728543988273e-05, "loss": 0.5984582901000977, "step": 6099 }, { "epoch": 7.484662576687117, "grad_norm": 0.30173006653785706, "learning_rate": 3.648282727974707e-05, "loss": 0.5870213508605957, "step": 6100 }, { "epoch": 7.485889570552147, "grad_norm": 0.24716569483280182, "learning_rate": 3.647836865677139e-05, "loss": 0.8087776303291321, "step": 6101 }, { "epoch": 7.487116564417178, "grad_norm": 0.23704878985881805, "learning_rate": 3.64739095711354e-05, "loss": 0.8148353099822998, "step": 6102 }, { "epoch": 7.488343558282208, "grad_norm": 0.23975643515586853, "learning_rate": 3.6469450023018856e-05, "loss": 0.5296715497970581, "step": 6103 }, { "epoch": 7.4895705521472395, "grad_norm": 0.8121957182884216, "learning_rate": 3.646499001260148e-05, "loss": 0.8940144181251526, "step": 6104 }, { "epoch": 7.49079754601227, "grad_norm": 0.2685476839542389, "learning_rate": 3.646052954006306e-05, "loss": 0.5965921878814697, "step": 6105 }, { "epoch": 7.492024539877301, "grad_norm": 0.18933048844337463, "learning_rate": 3.645606860558338e-05, "loss": 0.8343008756637573, "step": 6106 }, { "epoch": 7.493251533742331, "grad_norm": 0.31432172656059265, "learning_rate": 3.645160720934225e-05, "loss": 0.5567734241485596, "step": 6107 }, { "epoch": 7.494478527607362, "grad_norm": 0.28642112016677856, "learning_rate": 3.644714535151949e-05, "loss": 0.7864371538162231, "step": 6108 }, { "epoch": 7.495705521472392, "grad_norm": 0.2660631835460663, "learning_rate": 3.644268303229495e-05, "loss": 0.7720934152603149, "step": 6109 }, { "epoch": 7.4969325153374236, "grad_norm": 0.28100526332855225, "learning_rate": 3.64382202518485e-05, "loss": 0.6718452572822571, "step": 6110 }, { "epoch": 7.498159509202454, "grad_norm": 0.24596001207828522, "learning_rate": 3.643375701036001e-05, "loss": 0.7164433002471924, "step": 6111 }, { "epoch": 7.499386503067485, "grad_norm": 0.32936063408851624, "learning_rate": 3.6429293308009384e-05, "loss": 0.6811212301254272, "step": 6112 }, { "epoch": 7.500613496932515, "grad_norm": 0.2890465259552002, "learning_rate": 3.642482914497654e-05, "loss": 0.5615640878677368, "step": 6113 }, { "epoch": 7.501840490797546, "grad_norm": 0.2833188772201538, "learning_rate": 3.642036452144142e-05, "loss": 0.7259912490844727, "step": 6114 }, { "epoch": 7.5030674846625764, "grad_norm": 0.3079555332660675, "learning_rate": 3.6415899437583985e-05, "loss": 0.6996129751205444, "step": 6115 }, { "epoch": 7.504294478527608, "grad_norm": 0.3083193898200989, "learning_rate": 3.641143389358419e-05, "loss": 0.6492962837219238, "step": 6116 }, { "epoch": 7.505521472392638, "grad_norm": 0.2344294786453247, "learning_rate": 3.6406967889622044e-05, "loss": 0.8197253942489624, "step": 6117 }, { "epoch": 7.506748466257669, "grad_norm": 0.37871235609054565, "learning_rate": 3.640250142587756e-05, "loss": 0.4220663905143738, "step": 6118 }, { "epoch": 7.507975460122699, "grad_norm": 0.355727881193161, "learning_rate": 3.639803450253077e-05, "loss": 0.43056970834732056, "step": 6119 }, { "epoch": 7.50920245398773, "grad_norm": 0.28075623512268066, "learning_rate": 3.6393567119761704e-05, "loss": 0.7575830221176147, "step": 6120 }, { "epoch": 7.5104294478527605, "grad_norm": 0.27233269810676575, "learning_rate": 3.6389099277750456e-05, "loss": 0.6372696161270142, "step": 6121 }, { "epoch": 7.511656441717792, "grad_norm": 0.27468010783195496, "learning_rate": 3.638463097667709e-05, "loss": 0.6945120096206665, "step": 6122 }, { "epoch": 7.512883435582822, "grad_norm": 0.3552080988883972, "learning_rate": 3.6380162216721725e-05, "loss": 0.42887210845947266, "step": 6123 }, { "epoch": 7.514110429447853, "grad_norm": 0.2590892016887665, "learning_rate": 3.637569299806448e-05, "loss": 0.6073267459869385, "step": 6124 }, { "epoch": 7.515337423312883, "grad_norm": 0.23477795720100403, "learning_rate": 3.637122332088549e-05, "loss": 0.8718153834342957, "step": 6125 }, { "epoch": 7.516564417177914, "grad_norm": 0.38381427526474, "learning_rate": 3.636675318536492e-05, "loss": 0.7400368452072144, "step": 6126 }, { "epoch": 7.5177914110429445, "grad_norm": 0.2537796199321747, "learning_rate": 3.6362282591682946e-05, "loss": 0.672264575958252, "step": 6127 }, { "epoch": 7.519018404907976, "grad_norm": 0.22185809910297394, "learning_rate": 3.6357811540019767e-05, "loss": 0.8445502519607544, "step": 6128 }, { "epoch": 7.520245398773006, "grad_norm": 0.29890045523643494, "learning_rate": 3.635334003055561e-05, "loss": 0.5119988918304443, "step": 6129 }, { "epoch": 7.521472392638037, "grad_norm": 0.3652447760105133, "learning_rate": 3.634886806347068e-05, "loss": 0.5935074090957642, "step": 6130 }, { "epoch": 7.522699386503067, "grad_norm": 0.24143730103969574, "learning_rate": 3.634439563894525e-05, "loss": 0.8266226649284363, "step": 6131 }, { "epoch": 7.523926380368098, "grad_norm": 0.28543388843536377, "learning_rate": 3.6339922757159595e-05, "loss": 0.6380560398101807, "step": 6132 }, { "epoch": 7.5251533742331285, "grad_norm": 0.33887943625450134, "learning_rate": 3.6335449418293985e-05, "loss": 0.5568749904632568, "step": 6133 }, { "epoch": 7.52638036809816, "grad_norm": 0.3428747355937958, "learning_rate": 3.633097562252875e-05, "loss": 0.7429124712944031, "step": 6134 }, { "epoch": 7.52760736196319, "grad_norm": 0.2190382182598114, "learning_rate": 3.63265013700442e-05, "loss": 0.6764097213745117, "step": 6135 }, { "epoch": 7.528834355828221, "grad_norm": 0.30470189452171326, "learning_rate": 3.6322026661020683e-05, "loss": 0.7238267660140991, "step": 6136 }, { "epoch": 7.530061349693252, "grad_norm": 0.2854800820350647, "learning_rate": 3.631755149563857e-05, "loss": 0.6839290261268616, "step": 6137 }, { "epoch": 7.531288343558282, "grad_norm": 0.2784600257873535, "learning_rate": 3.631307587407822e-05, "loss": 0.7450480461120605, "step": 6138 }, { "epoch": 7.5325153374233125, "grad_norm": 0.23938049376010895, "learning_rate": 3.630859979652006e-05, "loss": 0.8270431756973267, "step": 6139 }, { "epoch": 7.533742331288344, "grad_norm": 0.2681463062763214, "learning_rate": 3.630412326314449e-05, "loss": 0.5842453837394714, "step": 6140 }, { "epoch": 7.534969325153375, "grad_norm": 0.2727797031402588, "learning_rate": 3.629964627413196e-05, "loss": 0.5941382646560669, "step": 6141 }, { "epoch": 7.536196319018405, "grad_norm": 0.24643196165561676, "learning_rate": 3.629516882966291e-05, "loss": 0.8273763656616211, "step": 6142 }, { "epoch": 7.537423312883435, "grad_norm": 0.29398342967033386, "learning_rate": 3.629069092991781e-05, "loss": 0.6347485780715942, "step": 6143 }, { "epoch": 7.538650306748466, "grad_norm": 0.33962804079055786, "learning_rate": 3.6286212575077174e-05, "loss": 0.7431983947753906, "step": 6144 }, { "epoch": 7.539877300613497, "grad_norm": 0.21643880009651184, "learning_rate": 3.6281733765321494e-05, "loss": 0.8142527341842651, "step": 6145 }, { "epoch": 7.541104294478528, "grad_norm": 0.27397337555885315, "learning_rate": 3.627725450083131e-05, "loss": 0.7365082502365112, "step": 6146 }, { "epoch": 7.542331288343558, "grad_norm": 0.22208520770072937, "learning_rate": 3.627277478178715e-05, "loss": 0.8308805227279663, "step": 6147 }, { "epoch": 7.543558282208589, "grad_norm": 0.25168904662132263, "learning_rate": 3.626829460836959e-05, "loss": 0.6163747906684875, "step": 6148 }, { "epoch": 7.54478527607362, "grad_norm": 0.2586239278316498, "learning_rate": 3.6263813980759215e-05, "loss": 0.7134466767311096, "step": 6149 }, { "epoch": 7.54601226993865, "grad_norm": 0.29113340377807617, "learning_rate": 3.625933289913662e-05, "loss": 0.6169823408126831, "step": 6150 }, { "epoch": 7.5472392638036805, "grad_norm": 0.31094279885292053, "learning_rate": 3.625485136368244e-05, "loss": 0.6740410327911377, "step": 6151 }, { "epoch": 7.548466257668712, "grad_norm": 0.31239375472068787, "learning_rate": 3.6250369374577285e-05, "loss": 0.5612934231758118, "step": 6152 }, { "epoch": 7.549693251533743, "grad_norm": 0.29452386498451233, "learning_rate": 3.6245886932001844e-05, "loss": 0.6811646223068237, "step": 6153 }, { "epoch": 7.550920245398773, "grad_norm": 0.3225609362125397, "learning_rate": 3.624140403613677e-05, "loss": 0.6491939425468445, "step": 6154 }, { "epoch": 7.552147239263804, "grad_norm": 0.38020482659339905, "learning_rate": 3.6236920687162756e-05, "loss": 0.5144277215003967, "step": 6155 }, { "epoch": 7.553374233128834, "grad_norm": 0.2559461295604706, "learning_rate": 3.623243688526052e-05, "loss": 0.8446409702301025, "step": 6156 }, { "epoch": 7.554601226993865, "grad_norm": 0.30102071166038513, "learning_rate": 3.622795263061079e-05, "loss": 0.4627087712287903, "step": 6157 }, { "epoch": 7.555828220858896, "grad_norm": 0.28194570541381836, "learning_rate": 3.6223467923394315e-05, "loss": 0.7636958360671997, "step": 6158 }, { "epoch": 7.557055214723927, "grad_norm": 0.3289940059185028, "learning_rate": 3.621898276379185e-05, "loss": 0.5674400925636292, "step": 6159 }, { "epoch": 7.558282208588957, "grad_norm": 0.26457616686820984, "learning_rate": 3.6214497151984194e-05, "loss": 0.7456961870193481, "step": 6160 }, { "epoch": 7.559509202453988, "grad_norm": 0.2886819541454315, "learning_rate": 3.621001108815214e-05, "loss": 0.5932040214538574, "step": 6161 }, { "epoch": 7.560736196319018, "grad_norm": 0.22255969047546387, "learning_rate": 3.6205524572476504e-05, "loss": 0.7779186964035034, "step": 6162 }, { "epoch": 7.561963190184049, "grad_norm": 0.275009423494339, "learning_rate": 3.620103760513815e-05, "loss": 0.6962167024612427, "step": 6163 }, { "epoch": 7.56319018404908, "grad_norm": 0.2156738042831421, "learning_rate": 3.61965501863179e-05, "loss": 0.7866460084915161, "step": 6164 }, { "epoch": 7.564417177914111, "grad_norm": 0.3251688778400421, "learning_rate": 3.619206231619666e-05, "loss": 0.5963265299797058, "step": 6165 }, { "epoch": 7.565644171779141, "grad_norm": 0.24253317713737488, "learning_rate": 3.618757399495529e-05, "loss": 0.7726688385009766, "step": 6166 }, { "epoch": 7.566871165644172, "grad_norm": 0.23873016238212585, "learning_rate": 3.6183085222774735e-05, "loss": 0.6628189086914062, "step": 6167 }, { "epoch": 7.568098159509202, "grad_norm": 0.27638235688209534, "learning_rate": 3.617859599983591e-05, "loss": 0.5588964223861694, "step": 6168 }, { "epoch": 7.569325153374233, "grad_norm": 0.28005343675613403, "learning_rate": 3.6174106326319754e-05, "loss": 0.7733038663864136, "step": 6169 }, { "epoch": 7.570552147239264, "grad_norm": 0.30841127038002014, "learning_rate": 3.6169616202407255e-05, "loss": 0.5005664229393005, "step": 6170 }, { "epoch": 7.571779141104295, "grad_norm": 0.27631881833076477, "learning_rate": 3.616512562827936e-05, "loss": 0.8353374004364014, "step": 6171 }, { "epoch": 7.573006134969325, "grad_norm": 0.25851455330848694, "learning_rate": 3.616063460411712e-05, "loss": 0.6826456785202026, "step": 6172 }, { "epoch": 7.574233128834356, "grad_norm": 0.2726741433143616, "learning_rate": 3.615614313010152e-05, "loss": 0.6845589876174927, "step": 6173 }, { "epoch": 7.575460122699386, "grad_norm": 0.3144034445285797, "learning_rate": 3.61516512064136e-05, "loss": 0.5704929828643799, "step": 6174 }, { "epoch": 7.576687116564417, "grad_norm": 0.2164338231086731, "learning_rate": 3.614715883323443e-05, "loss": 0.7142053842544556, "step": 6175 }, { "epoch": 7.577914110429448, "grad_norm": 0.2492070347070694, "learning_rate": 3.614266601074508e-05, "loss": 0.6745789647102356, "step": 6176 }, { "epoch": 7.579141104294479, "grad_norm": 0.25631001591682434, "learning_rate": 3.613817273912666e-05, "loss": 0.7086209058761597, "step": 6177 }, { "epoch": 7.580368098159509, "grad_norm": 0.2842054069042206, "learning_rate": 3.6133679018560245e-05, "loss": 0.670740008354187, "step": 6178 }, { "epoch": 7.58159509202454, "grad_norm": 0.2645622491836548, "learning_rate": 3.612918484922699e-05, "loss": 0.9351195096969604, "step": 6179 }, { "epoch": 7.58282208588957, "grad_norm": 0.473416268825531, "learning_rate": 3.612469023130803e-05, "loss": 0.6703568696975708, "step": 6180 }, { "epoch": 7.584049079754601, "grad_norm": 0.2826102674007416, "learning_rate": 3.612019516498454e-05, "loss": 0.49276474118232727, "step": 6181 }, { "epoch": 7.585276073619632, "grad_norm": 0.29466381669044495, "learning_rate": 3.6115699650437706e-05, "loss": 0.7593944072723389, "step": 6182 }, { "epoch": 7.586503067484663, "grad_norm": 0.25806209444999695, "learning_rate": 3.6111203687848706e-05, "loss": 0.7485900521278381, "step": 6183 }, { "epoch": 7.587730061349693, "grad_norm": 0.29697179794311523, "learning_rate": 3.610670727739879e-05, "loss": 0.8201342821121216, "step": 6184 }, { "epoch": 7.588957055214724, "grad_norm": 0.3203318417072296, "learning_rate": 3.610221041926917e-05, "loss": 0.5735909342765808, "step": 6185 }, { "epoch": 7.590184049079754, "grad_norm": 0.2997604310512543, "learning_rate": 3.609771311364111e-05, "loss": 0.7937171459197998, "step": 6186 }, { "epoch": 7.591411042944785, "grad_norm": 0.3408471643924713, "learning_rate": 3.60932153606959e-05, "loss": 0.7592064142227173, "step": 6187 }, { "epoch": 7.592638036809816, "grad_norm": 0.26584509015083313, "learning_rate": 3.608871716061481e-05, "loss": 0.8863129615783691, "step": 6188 }, { "epoch": 7.593865030674847, "grad_norm": 0.21910858154296875, "learning_rate": 3.6084218513579154e-05, "loss": 0.8169558644294739, "step": 6189 }, { "epoch": 7.595092024539877, "grad_norm": 0.31188786029815674, "learning_rate": 3.6079719419770275e-05, "loss": 0.6914784908294678, "step": 6190 }, { "epoch": 7.596319018404908, "grad_norm": 0.26617947220802307, "learning_rate": 3.6075219879369496e-05, "loss": 0.7328115105628967, "step": 6191 }, { "epoch": 7.597546012269938, "grad_norm": 0.2508351802825928, "learning_rate": 3.607071989255819e-05, "loss": 0.685524582862854, "step": 6192 }, { "epoch": 7.598773006134969, "grad_norm": 0.28558585047721863, "learning_rate": 3.606621945951775e-05, "loss": 0.6918848752975464, "step": 6193 }, { "epoch": 7.6, "grad_norm": 0.27603691816329956, "learning_rate": 3.6061718580429554e-05, "loss": 0.6356184482574463, "step": 6194 }, { "epoch": 7.601226993865031, "grad_norm": 0.23052918910980225, "learning_rate": 3.6057217255475034e-05, "loss": 0.6017132997512817, "step": 6195 }, { "epoch": 7.602453987730061, "grad_norm": 0.3543953597545624, "learning_rate": 3.605271548483562e-05, "loss": 0.5206677913665771, "step": 6196 }, { "epoch": 7.603680981595092, "grad_norm": 0.312587171792984, "learning_rate": 3.6048213268692777e-05, "loss": 0.7187404036521912, "step": 6197 }, { "epoch": 7.604907975460122, "grad_norm": 0.2752440869808197, "learning_rate": 3.604371060722797e-05, "loss": 0.679713785648346, "step": 6198 }, { "epoch": 7.606134969325153, "grad_norm": 0.22908538579940796, "learning_rate": 3.6039207500622674e-05, "loss": 0.7992199659347534, "step": 6199 }, { "epoch": 7.6073619631901845, "grad_norm": 0.25759056210517883, "learning_rate": 3.6034703949058415e-05, "loss": 0.7004653215408325, "step": 6200 }, { "epoch": 7.608588957055215, "grad_norm": 0.24445606768131256, "learning_rate": 3.60301999527167e-05, "loss": 0.6968485116958618, "step": 6201 }, { "epoch": 7.609815950920245, "grad_norm": 0.27034294605255127, "learning_rate": 3.60256955117791e-05, "loss": 0.520902156829834, "step": 6202 }, { "epoch": 7.611042944785276, "grad_norm": 0.30790695548057556, "learning_rate": 3.602119062642716e-05, "loss": 0.7880120277404785, "step": 6203 }, { "epoch": 7.612269938650307, "grad_norm": 0.2765730321407318, "learning_rate": 3.6016685296842455e-05, "loss": 0.7318812608718872, "step": 6204 }, { "epoch": 7.613496932515337, "grad_norm": 0.28177115321159363, "learning_rate": 3.6012179523206576e-05, "loss": 0.6582011580467224, "step": 6205 }, { "epoch": 7.614723926380368, "grad_norm": 0.245488241314888, "learning_rate": 3.600767330570116e-05, "loss": 0.5829852819442749, "step": 6206 }, { "epoch": 7.615950920245399, "grad_norm": 0.2658447027206421, "learning_rate": 3.6003166644507825e-05, "loss": 0.7093533277511597, "step": 6207 }, { "epoch": 7.61717791411043, "grad_norm": 0.2104220688343048, "learning_rate": 3.599865953980822e-05, "loss": 0.7814711332321167, "step": 6208 }, { "epoch": 7.61840490797546, "grad_norm": 0.25049448013305664, "learning_rate": 3.599415199178403e-05, "loss": 0.7350504398345947, "step": 6209 }, { "epoch": 7.61963190184049, "grad_norm": 0.30421143770217896, "learning_rate": 3.598964400061692e-05, "loss": 0.7252524495124817, "step": 6210 }, { "epoch": 7.620858895705521, "grad_norm": 0.41509443521499634, "learning_rate": 3.5985135566488606e-05, "loss": 0.3799275755882263, "step": 6211 }, { "epoch": 7.6220858895705526, "grad_norm": 0.35769861936569214, "learning_rate": 3.598062668958081e-05, "loss": 0.5088828802108765, "step": 6212 }, { "epoch": 7.623312883435583, "grad_norm": 0.26634177565574646, "learning_rate": 3.597611737007527e-05, "loss": 0.6743721961975098, "step": 6213 }, { "epoch": 7.624539877300613, "grad_norm": 0.2636820673942566, "learning_rate": 3.597160760815374e-05, "loss": 0.7050537467002869, "step": 6214 }, { "epoch": 7.625766871165644, "grad_norm": 0.2760486304759979, "learning_rate": 3.5967097403998004e-05, "loss": 0.9533033967018127, "step": 6215 }, { "epoch": 7.626993865030675, "grad_norm": 0.22665497660636902, "learning_rate": 3.596258675778984e-05, "loss": 0.7433376312255859, "step": 6216 }, { "epoch": 7.6282208588957054, "grad_norm": 0.366129994392395, "learning_rate": 3.595807566971108e-05, "loss": 0.5905453562736511, "step": 6217 }, { "epoch": 7.629447852760737, "grad_norm": 0.21732363104820251, "learning_rate": 3.595356413994354e-05, "loss": 0.8707976341247559, "step": 6218 }, { "epoch": 7.630674846625767, "grad_norm": 0.20425014197826385, "learning_rate": 3.5949052168669074e-05, "loss": 0.7546035051345825, "step": 6219 }, { "epoch": 7.631901840490798, "grad_norm": 0.2089073210954666, "learning_rate": 3.594453975606954e-05, "loss": 0.8295494318008423, "step": 6220 }, { "epoch": 7.633128834355828, "grad_norm": 0.2912794351577759, "learning_rate": 3.594002690232682e-05, "loss": 0.6681321859359741, "step": 6221 }, { "epoch": 7.634355828220859, "grad_norm": 0.2893364727497101, "learning_rate": 3.593551360762282e-05, "loss": 0.6083507537841797, "step": 6222 }, { "epoch": 7.6355828220858895, "grad_norm": 0.22438155114650726, "learning_rate": 3.593099987213946e-05, "loss": 0.7350419759750366, "step": 6223 }, { "epoch": 7.636809815950921, "grad_norm": 0.2258642613887787, "learning_rate": 3.5926485696058663e-05, "loss": 0.8439100980758667, "step": 6224 }, { "epoch": 7.638036809815951, "grad_norm": 0.2620466947555542, "learning_rate": 3.59219710795624e-05, "loss": 0.7964906692504883, "step": 6225 }, { "epoch": 7.639263803680982, "grad_norm": 0.27368366718292236, "learning_rate": 3.591745602283263e-05, "loss": 0.8615629076957703, "step": 6226 }, { "epoch": 7.640490797546012, "grad_norm": 0.3348425030708313, "learning_rate": 3.591294052605134e-05, "loss": 0.6888315677642822, "step": 6227 }, { "epoch": 7.641717791411043, "grad_norm": 0.30975136160850525, "learning_rate": 3.590842458940055e-05, "loss": 0.6513122320175171, "step": 6228 }, { "epoch": 7.6429447852760735, "grad_norm": 0.2575242817401886, "learning_rate": 3.590390821306226e-05, "loss": 0.8203145265579224, "step": 6229 }, { "epoch": 7.644171779141105, "grad_norm": 0.24767419695854187, "learning_rate": 3.589939139721855e-05, "loss": 0.694716215133667, "step": 6230 }, { "epoch": 7.645398773006135, "grad_norm": 0.31650015711784363, "learning_rate": 3.589487414205145e-05, "loss": 0.5731862783432007, "step": 6231 }, { "epoch": 7.646625766871166, "grad_norm": 0.21859532594680786, "learning_rate": 3.589035644774304e-05, "loss": 0.8743429183959961, "step": 6232 }, { "epoch": 7.647852760736196, "grad_norm": 0.2656239867210388, "learning_rate": 3.5885838314475434e-05, "loss": 0.7081049680709839, "step": 6233 }, { "epoch": 7.649079754601227, "grad_norm": 0.322681188583374, "learning_rate": 3.588131974243073e-05, "loss": 0.6534411907196045, "step": 6234 }, { "epoch": 7.6503067484662575, "grad_norm": 0.2820695638656616, "learning_rate": 3.587680073179106e-05, "loss": 0.694216251373291, "step": 6235 }, { "epoch": 7.651533742331289, "grad_norm": 0.7724878787994385, "learning_rate": 3.5872281282738575e-05, "loss": 0.7713532447814941, "step": 6236 }, { "epoch": 7.652760736196319, "grad_norm": 0.2547295093536377, "learning_rate": 3.586776139545543e-05, "loss": 0.6997547149658203, "step": 6237 }, { "epoch": 7.65398773006135, "grad_norm": 0.32498955726623535, "learning_rate": 3.586324107012383e-05, "loss": 0.610040545463562, "step": 6238 }, { "epoch": 7.65521472392638, "grad_norm": 0.21734017133712769, "learning_rate": 3.585872030692596e-05, "loss": 0.7365521192550659, "step": 6239 }, { "epoch": 7.656441717791411, "grad_norm": 0.2139178216457367, "learning_rate": 3.585419910604405e-05, "loss": 0.7645847797393799, "step": 6240 }, { "epoch": 7.6576687116564415, "grad_norm": 0.2710302770137787, "learning_rate": 3.584967746766033e-05, "loss": 0.6656256914138794, "step": 6241 }, { "epoch": 7.658895705521473, "grad_norm": 0.2671816945075989, "learning_rate": 3.5845155391957046e-05, "loss": 0.5622760057449341, "step": 6242 }, { "epoch": 7.660122699386503, "grad_norm": 0.23645633459091187, "learning_rate": 3.584063287911649e-05, "loss": 0.6278363466262817, "step": 6243 }, { "epoch": 7.661349693251534, "grad_norm": 0.302773118019104, "learning_rate": 3.5836109929320936e-05, "loss": 0.7436484098434448, "step": 6244 }, { "epoch": 7.662576687116564, "grad_norm": 0.19718340039253235, "learning_rate": 3.5831586542752705e-05, "loss": 0.7673658132553101, "step": 6245 }, { "epoch": 7.663803680981595, "grad_norm": 0.2344202995300293, "learning_rate": 3.582706271959411e-05, "loss": 0.6993998885154724, "step": 6246 }, { "epoch": 7.6650306748466255, "grad_norm": 0.2869882881641388, "learning_rate": 3.5822538460027485e-05, "loss": 0.7518829107284546, "step": 6247 }, { "epoch": 7.666257668711657, "grad_norm": 0.24634712934494019, "learning_rate": 3.5818013764235216e-05, "loss": 0.8034216165542603, "step": 6248 }, { "epoch": 7.667484662576687, "grad_norm": 0.22168126702308655, "learning_rate": 3.5813488632399656e-05, "loss": 0.7873356342315674, "step": 6249 }, { "epoch": 7.668711656441718, "grad_norm": 0.27781498432159424, "learning_rate": 3.5808963064703215e-05, "loss": 0.5512465834617615, "step": 6250 }, { "epoch": 7.669938650306748, "grad_norm": 0.24439065158367157, "learning_rate": 3.58044370613283e-05, "loss": 0.5553398132324219, "step": 6251 }, { "epoch": 7.671165644171779, "grad_norm": 0.34509456157684326, "learning_rate": 3.5799910622457343e-05, "loss": 0.6394045948982239, "step": 6252 }, { "epoch": 7.6723926380368095, "grad_norm": 0.3247109055519104, "learning_rate": 3.5795383748272795e-05, "loss": 0.6241639852523804, "step": 6253 }, { "epoch": 7.673619631901841, "grad_norm": 0.21834519505500793, "learning_rate": 3.579085643895711e-05, "loss": 0.9618014097213745, "step": 6254 }, { "epoch": 7.674846625766871, "grad_norm": 0.23950743675231934, "learning_rate": 3.578632869469278e-05, "loss": 0.8278986215591431, "step": 6255 }, { "epoch": 7.676073619631902, "grad_norm": 0.292389839887619, "learning_rate": 3.5781800515662305e-05, "loss": 0.6984758377075195, "step": 6256 }, { "epoch": 7.677300613496932, "grad_norm": 0.2450593262910843, "learning_rate": 3.577727190204821e-05, "loss": 0.7691605091094971, "step": 6257 }, { "epoch": 7.678527607361963, "grad_norm": 0.32750827074050903, "learning_rate": 3.5772742854033016e-05, "loss": 0.7009886503219604, "step": 6258 }, { "epoch": 7.6797546012269935, "grad_norm": 0.2371959686279297, "learning_rate": 3.5768213371799274e-05, "loss": 0.7963063716888428, "step": 6259 }, { "epoch": 7.680981595092025, "grad_norm": 0.2708028256893158, "learning_rate": 3.576368345552957e-05, "loss": 0.7306592464447021, "step": 6260 }, { "epoch": 7.682208588957055, "grad_norm": 0.30208203196525574, "learning_rate": 3.575915310540649e-05, "loss": 0.7089589834213257, "step": 6261 }, { "epoch": 7.683435582822086, "grad_norm": 0.3653683066368103, "learning_rate": 3.575462232161262e-05, "loss": 0.6142320036888123, "step": 6262 }, { "epoch": 7.684662576687117, "grad_norm": 0.29423874616622925, "learning_rate": 3.575009110433061e-05, "loss": 0.6970834732055664, "step": 6263 }, { "epoch": 7.685889570552147, "grad_norm": 0.3110799193382263, "learning_rate": 3.574555945374309e-05, "loss": 0.6780170202255249, "step": 6264 }, { "epoch": 7.6871165644171775, "grad_norm": 0.25647932291030884, "learning_rate": 3.574102737003271e-05, "loss": 0.6903520822525024, "step": 6265 }, { "epoch": 7.688343558282209, "grad_norm": 0.25262993574142456, "learning_rate": 3.5736494853382146e-05, "loss": 0.7752684354782104, "step": 6266 }, { "epoch": 7.68957055214724, "grad_norm": 0.23460766673088074, "learning_rate": 3.57319619039741e-05, "loss": 0.8362565636634827, "step": 6267 }, { "epoch": 7.69079754601227, "grad_norm": 0.3922935426235199, "learning_rate": 3.572742852199128e-05, "loss": 0.6224590539932251, "step": 6268 }, { "epoch": 7.6920245398773, "grad_norm": 0.3351585865020752, "learning_rate": 3.572289470761642e-05, "loss": 0.4686376452445984, "step": 6269 }, { "epoch": 7.693251533742331, "grad_norm": 0.27228811383247375, "learning_rate": 3.571836046103224e-05, "loss": 0.7731865644454956, "step": 6270 }, { "epoch": 7.694478527607362, "grad_norm": 0.2519312798976898, "learning_rate": 3.571382578242153e-05, "loss": 0.817628026008606, "step": 6271 }, { "epoch": 7.695705521472393, "grad_norm": 0.2541417181491852, "learning_rate": 3.570929067196705e-05, "loss": 0.8164085149765015, "step": 6272 }, { "epoch": 7.696932515337423, "grad_norm": 0.2709287405014038, "learning_rate": 3.570475512985162e-05, "loss": 0.7356339693069458, "step": 6273 }, { "epoch": 7.698159509202454, "grad_norm": 0.2675316035747528, "learning_rate": 3.5700219156258035e-05, "loss": 0.6042225360870361, "step": 6274 }, { "epoch": 7.699386503067485, "grad_norm": 0.3018776476383209, "learning_rate": 3.5695682751369136e-05, "loss": 0.6574692726135254, "step": 6275 }, { "epoch": 7.700613496932515, "grad_norm": 0.22991326451301575, "learning_rate": 3.569114591536777e-05, "loss": 0.8869802355766296, "step": 6276 }, { "epoch": 7.7018404907975455, "grad_norm": 0.2969525456428528, "learning_rate": 3.56866086484368e-05, "loss": 0.6360031962394714, "step": 6277 }, { "epoch": 7.703067484662577, "grad_norm": 0.3294198215007782, "learning_rate": 3.568207095075912e-05, "loss": 0.6375769376754761, "step": 6278 }, { "epoch": 7.704294478527608, "grad_norm": 0.2874973714351654, "learning_rate": 3.5677532822517624e-05, "loss": 0.623879075050354, "step": 6279 }, { "epoch": 7.705521472392638, "grad_norm": 0.23812483251094818, "learning_rate": 3.5672994263895235e-05, "loss": 0.8035266399383545, "step": 6280 }, { "epoch": 7.706748466257669, "grad_norm": 0.4427526295185089, "learning_rate": 3.566845527507489e-05, "loss": 0.47041812539100647, "step": 6281 }, { "epoch": 7.707975460122699, "grad_norm": 0.31160640716552734, "learning_rate": 3.5663915856239535e-05, "loss": 0.49724090099334717, "step": 6282 }, { "epoch": 7.70920245398773, "grad_norm": 0.3277624249458313, "learning_rate": 3.565937600757214e-05, "loss": 0.5905218720436096, "step": 6283 }, { "epoch": 7.710429447852761, "grad_norm": 0.26817840337753296, "learning_rate": 3.565483572925571e-05, "loss": 0.6567869782447815, "step": 6284 }, { "epoch": 7.711656441717792, "grad_norm": 0.2818578779697418, "learning_rate": 3.565029502147323e-05, "loss": 0.8142696022987366, "step": 6285 }, { "epoch": 7.712883435582822, "grad_norm": 0.3327732980251312, "learning_rate": 3.564575388440774e-05, "loss": 0.8166195154190063, "step": 6286 }, { "epoch": 7.714110429447853, "grad_norm": 0.2489572912454605, "learning_rate": 3.564121231824228e-05, "loss": 0.781832218170166, "step": 6287 }, { "epoch": 7.715337423312883, "grad_norm": 0.22168587148189545, "learning_rate": 3.563667032315989e-05, "loss": 0.7487719655036926, "step": 6288 }, { "epoch": 7.716564417177914, "grad_norm": 0.2596166133880615, "learning_rate": 3.5632127899343664e-05, "loss": 0.5914663672447205, "step": 6289 }, { "epoch": 7.717791411042945, "grad_norm": 0.3220753073692322, "learning_rate": 3.562758504697669e-05, "loss": 0.5898213982582092, "step": 6290 }, { "epoch": 7.719018404907976, "grad_norm": 0.31845203042030334, "learning_rate": 3.562304176624206e-05, "loss": 0.6302706003189087, "step": 6291 }, { "epoch": 7.720245398773006, "grad_norm": 0.3222770690917969, "learning_rate": 3.561849805732293e-05, "loss": 0.5411694049835205, "step": 6292 }, { "epoch": 7.721472392638037, "grad_norm": 0.28255075216293335, "learning_rate": 3.561395392040241e-05, "loss": 0.6313619613647461, "step": 6293 }, { "epoch": 7.722699386503067, "grad_norm": 0.3032207190990448, "learning_rate": 3.560940935566369e-05, "loss": 0.617804765701294, "step": 6294 }, { "epoch": 7.723926380368098, "grad_norm": 0.2738857865333557, "learning_rate": 3.560486436328993e-05, "loss": 0.8350663185119629, "step": 6295 }, { "epoch": 7.725153374233129, "grad_norm": 0.256756454706192, "learning_rate": 3.560031894346434e-05, "loss": 0.8187645673751831, "step": 6296 }, { "epoch": 7.72638036809816, "grad_norm": 0.3252195715904236, "learning_rate": 3.559577309637013e-05, "loss": 0.5843849778175354, "step": 6297 }, { "epoch": 7.72760736196319, "grad_norm": 0.2288447469472885, "learning_rate": 3.5591226822190525e-05, "loss": 0.6676013469696045, "step": 6298 }, { "epoch": 7.728834355828221, "grad_norm": 0.28681448101997375, "learning_rate": 3.558668012110877e-05, "loss": 0.6774126887321472, "step": 6299 }, { "epoch": 7.730061349693251, "grad_norm": 0.26210641860961914, "learning_rate": 3.558213299330814e-05, "loss": 0.7463087439537048, "step": 6300 }, { "epoch": 7.731288343558282, "grad_norm": 0.2241361290216446, "learning_rate": 3.5577585438971906e-05, "loss": 0.8653230667114258, "step": 6301 }, { "epoch": 7.732515337423313, "grad_norm": 0.25828802585601807, "learning_rate": 3.557303745828336e-05, "loss": 0.8344051837921143, "step": 6302 }, { "epoch": 7.733742331288344, "grad_norm": 0.3802873194217682, "learning_rate": 3.556848905142585e-05, "loss": 0.49512308835983276, "step": 6303 }, { "epoch": 7.734969325153374, "grad_norm": 0.2751271426677704, "learning_rate": 3.556394021858268e-05, "loss": 0.7677721977233887, "step": 6304 }, { "epoch": 7.736196319018405, "grad_norm": 0.3289409577846527, "learning_rate": 3.5559390959937204e-05, "loss": 0.665313184261322, "step": 6305 }, { "epoch": 7.737423312883435, "grad_norm": 0.2699744999408722, "learning_rate": 3.5554841275672794e-05, "loss": 0.8693842887878418, "step": 6306 }, { "epoch": 7.738650306748466, "grad_norm": 0.30487021803855896, "learning_rate": 3.5550291165972846e-05, "loss": 0.7258412837982178, "step": 6307 }, { "epoch": 7.739877300613497, "grad_norm": 0.35656699538230896, "learning_rate": 3.5545740631020745e-05, "loss": 0.48992854356765747, "step": 6308 }, { "epoch": 7.741104294478528, "grad_norm": 0.3417209982872009, "learning_rate": 3.554118967099991e-05, "loss": 0.5276070237159729, "step": 6309 }, { "epoch": 7.742331288343558, "grad_norm": 0.3867564797401428, "learning_rate": 3.55366382860938e-05, "loss": 0.5275726914405823, "step": 6310 }, { "epoch": 7.743558282208589, "grad_norm": 0.24303078651428223, "learning_rate": 3.553208647648584e-05, "loss": 0.719061553478241, "step": 6311 }, { "epoch": 7.744785276073619, "grad_norm": 0.2842094302177429, "learning_rate": 3.552753424235952e-05, "loss": 0.6195003986358643, "step": 6312 }, { "epoch": 7.74601226993865, "grad_norm": 0.278209388256073, "learning_rate": 3.552298158389831e-05, "loss": 0.6697266101837158, "step": 6313 }, { "epoch": 7.747239263803681, "grad_norm": 0.3011189103126526, "learning_rate": 3.551842850128573e-05, "loss": 0.6959725618362427, "step": 6314 }, { "epoch": 7.748466257668712, "grad_norm": 0.24535013735294342, "learning_rate": 3.55138749947053e-05, "loss": 0.7743730545043945, "step": 6315 }, { "epoch": 7.749693251533742, "grad_norm": 0.2593958377838135, "learning_rate": 3.550932106434055e-05, "loss": 0.7155618667602539, "step": 6316 }, { "epoch": 7.750920245398773, "grad_norm": 0.266903817653656, "learning_rate": 3.550476671037505e-05, "loss": 0.6626027226448059, "step": 6317 }, { "epoch": 7.752147239263803, "grad_norm": 0.23281769454479218, "learning_rate": 3.550021193299236e-05, "loss": 0.8508052229881287, "step": 6318 }, { "epoch": 7.7533742331288344, "grad_norm": 0.28011462092399597, "learning_rate": 3.5495656732376066e-05, "loss": 0.5546311140060425, "step": 6319 }, { "epoch": 7.754601226993865, "grad_norm": 0.33054113388061523, "learning_rate": 3.549110110870979e-05, "loss": 0.6114985346794128, "step": 6320 }, { "epoch": 7.755828220858896, "grad_norm": 0.283687949180603, "learning_rate": 3.5486545062177154e-05, "loss": 0.5844095945358276, "step": 6321 }, { "epoch": 7.757055214723926, "grad_norm": 0.2825002372264862, "learning_rate": 3.5481988592961793e-05, "loss": 0.6006801724433899, "step": 6322 }, { "epoch": 7.758282208588957, "grad_norm": 0.2776808440685272, "learning_rate": 3.547743170124737e-05, "loss": 0.6776776909828186, "step": 6323 }, { "epoch": 7.759509202453987, "grad_norm": 0.24107792973518372, "learning_rate": 3.547287438721755e-05, "loss": 0.5917731523513794, "step": 6324 }, { "epoch": 7.7607361963190185, "grad_norm": 0.29796597361564636, "learning_rate": 3.5468316651056036e-05, "loss": 0.7269648313522339, "step": 6325 }, { "epoch": 7.76196319018405, "grad_norm": 0.24117526412010193, "learning_rate": 3.5463758492946534e-05, "loss": 0.6359647512435913, "step": 6326 }, { "epoch": 7.76319018404908, "grad_norm": 0.2753639221191406, "learning_rate": 3.545919991307277e-05, "loss": 0.695087194442749, "step": 6327 }, { "epoch": 7.76441717791411, "grad_norm": 0.26519450545310974, "learning_rate": 3.5454640911618485e-05, "loss": 0.7413110733032227, "step": 6328 }, { "epoch": 7.765644171779141, "grad_norm": 0.25271138548851013, "learning_rate": 3.5450081488767456e-05, "loss": 0.5336664915084839, "step": 6329 }, { "epoch": 7.766871165644172, "grad_norm": 0.3601246476173401, "learning_rate": 3.5445521644703436e-05, "loss": 0.5179988145828247, "step": 6330 }, { "epoch": 7.7680981595092025, "grad_norm": 0.30512481927871704, "learning_rate": 3.544096137961023e-05, "loss": 0.8361567854881287, "step": 6331 }, { "epoch": 7.769325153374233, "grad_norm": 0.29300957918167114, "learning_rate": 3.5436400693671654e-05, "loss": 0.7071958780288696, "step": 6332 }, { "epoch": 7.770552147239264, "grad_norm": 0.25928834080696106, "learning_rate": 3.543183958707153e-05, "loss": 0.8110322952270508, "step": 6333 }, { "epoch": 7.771779141104295, "grad_norm": 0.21924035251140594, "learning_rate": 3.542727805999371e-05, "loss": 0.7111966609954834, "step": 6334 }, { "epoch": 7.773006134969325, "grad_norm": 0.2302333116531372, "learning_rate": 3.542271611262204e-05, "loss": 0.7420889139175415, "step": 6335 }, { "epoch": 7.774233128834355, "grad_norm": 0.32313379645347595, "learning_rate": 3.541815374514042e-05, "loss": 0.6994321942329407, "step": 6336 }, { "epoch": 7.7754601226993865, "grad_norm": 0.3021433353424072, "learning_rate": 3.541359095773273e-05, "loss": 0.667011022567749, "step": 6337 }, { "epoch": 7.776687116564418, "grad_norm": 0.2345401495695114, "learning_rate": 3.540902775058288e-05, "loss": 0.747456967830658, "step": 6338 }, { "epoch": 7.777914110429448, "grad_norm": 0.2690333425998688, "learning_rate": 3.5404464123874826e-05, "loss": 0.6751466393470764, "step": 6339 }, { "epoch": 7.779141104294479, "grad_norm": 0.21674363315105438, "learning_rate": 3.539990007779249e-05, "loss": 0.757398247718811, "step": 6340 }, { "epoch": 7.780368098159509, "grad_norm": 0.28461581468582153, "learning_rate": 3.539533561251984e-05, "loss": 0.6418748497962952, "step": 6341 }, { "epoch": 7.78159509202454, "grad_norm": 0.29589661955833435, "learning_rate": 3.5390770728240864e-05, "loss": 0.6856507062911987, "step": 6342 }, { "epoch": 7.7828220858895705, "grad_norm": 0.3538205921649933, "learning_rate": 3.538620542513955e-05, "loss": 0.6638555526733398, "step": 6343 }, { "epoch": 7.784049079754602, "grad_norm": 0.23964239656925201, "learning_rate": 3.5381639703399924e-05, "loss": 0.7934955358505249, "step": 6344 }, { "epoch": 7.785276073619632, "grad_norm": 0.2435912787914276, "learning_rate": 3.537707356320601e-05, "loss": 0.7034561634063721, "step": 6345 }, { "epoch": 7.786503067484663, "grad_norm": 0.29393208026885986, "learning_rate": 3.537250700474186e-05, "loss": 0.8181705474853516, "step": 6346 }, { "epoch": 7.787730061349693, "grad_norm": 0.24244424700737, "learning_rate": 3.536794002819154e-05, "loss": 0.870812714099884, "step": 6347 }, { "epoch": 7.788957055214724, "grad_norm": 0.33991706371307373, "learning_rate": 3.536337263373911e-05, "loss": 0.6707617044448853, "step": 6348 }, { "epoch": 7.7901840490797545, "grad_norm": 0.30415797233581543, "learning_rate": 3.53588048215687e-05, "loss": 0.7351387739181519, "step": 6349 }, { "epoch": 7.791411042944786, "grad_norm": 0.2727274000644684, "learning_rate": 3.535423659186441e-05, "loss": 0.7350095510482788, "step": 6350 }, { "epoch": 7.792638036809816, "grad_norm": 0.2167942076921463, "learning_rate": 3.534966794481037e-05, "loss": 0.7483677864074707, "step": 6351 }, { "epoch": 7.793865030674847, "grad_norm": 0.3132603168487549, "learning_rate": 3.5345098880590735e-05, "loss": 0.6543707847595215, "step": 6352 }, { "epoch": 7.795092024539877, "grad_norm": 0.2661445736885071, "learning_rate": 3.5340529399389666e-05, "loss": 0.7246429920196533, "step": 6353 }, { "epoch": 7.796319018404908, "grad_norm": 0.26636210083961487, "learning_rate": 3.533595950139136e-05, "loss": 0.6838340163230896, "step": 6354 }, { "epoch": 7.7975460122699385, "grad_norm": 0.2597968280315399, "learning_rate": 3.5331389186779996e-05, "loss": 0.7571799755096436, "step": 6355 }, { "epoch": 7.79877300613497, "grad_norm": 0.25045493245124817, "learning_rate": 3.532681845573981e-05, "loss": 0.7062060832977295, "step": 6356 }, { "epoch": 7.8, "grad_norm": 0.28667500615119934, "learning_rate": 3.532224730845502e-05, "loss": 0.6844527125358582, "step": 6357 }, { "epoch": 7.801226993865031, "grad_norm": 0.32884085178375244, "learning_rate": 3.5317675745109866e-05, "loss": 0.7087054252624512, "step": 6358 }, { "epoch": 7.802453987730061, "grad_norm": 0.3701448142528534, "learning_rate": 3.531310376588865e-05, "loss": 0.6199939846992493, "step": 6359 }, { "epoch": 7.803680981595092, "grad_norm": 0.22207815945148468, "learning_rate": 3.5308531370975624e-05, "loss": 0.7913895845413208, "step": 6360 }, { "epoch": 7.8049079754601225, "grad_norm": 0.24317918717861176, "learning_rate": 3.5303958560555106e-05, "loss": 0.8559998869895935, "step": 6361 }, { "epoch": 7.806134969325154, "grad_norm": 0.25286242365837097, "learning_rate": 3.52993853348114e-05, "loss": 0.663849949836731, "step": 6362 }, { "epoch": 7.807361963190184, "grad_norm": 0.3072768747806549, "learning_rate": 3.529481169392885e-05, "loss": 0.6734678745269775, "step": 6363 }, { "epoch": 7.808588957055215, "grad_norm": 0.26463401317596436, "learning_rate": 3.52902376380918e-05, "loss": 0.5625600218772888, "step": 6364 }, { "epoch": 7.809815950920245, "grad_norm": 0.2869165241718292, "learning_rate": 3.5285663167484616e-05, "loss": 0.6242519021034241, "step": 6365 }, { "epoch": 7.811042944785276, "grad_norm": 0.22985540330410004, "learning_rate": 3.52810882822917e-05, "loss": 0.9105170965194702, "step": 6366 }, { "epoch": 7.8122699386503065, "grad_norm": 0.22468838095664978, "learning_rate": 3.527651298269742e-05, "loss": 0.715308666229248, "step": 6367 }, { "epoch": 7.813496932515338, "grad_norm": 0.25548115372657776, "learning_rate": 3.527193726888622e-05, "loss": 0.742353618144989, "step": 6368 }, { "epoch": 7.814723926380368, "grad_norm": 0.3422873020172119, "learning_rate": 3.526736114104253e-05, "loss": 0.6141523122787476, "step": 6369 }, { "epoch": 7.815950920245399, "grad_norm": 0.2508634626865387, "learning_rate": 3.526278459935079e-05, "loss": 0.8052802085876465, "step": 6370 }, { "epoch": 7.817177914110429, "grad_norm": 0.270503431558609, "learning_rate": 3.525820764399547e-05, "loss": 0.5907173752784729, "step": 6371 }, { "epoch": 7.81840490797546, "grad_norm": 0.24352358281612396, "learning_rate": 3.525363027516106e-05, "loss": 0.6621992588043213, "step": 6372 }, { "epoch": 7.8196319018404905, "grad_norm": 0.324603796005249, "learning_rate": 3.5249052493032064e-05, "loss": 0.6570391654968262, "step": 6373 }, { "epoch": 7.820858895705522, "grad_norm": 0.32904621958732605, "learning_rate": 3.5244474297792986e-05, "loss": 0.7233416438102722, "step": 6374 }, { "epoch": 7.822085889570552, "grad_norm": 0.3036096692085266, "learning_rate": 3.523989568962837e-05, "loss": 0.7938022613525391, "step": 6375 }, { "epoch": 7.823312883435583, "grad_norm": 0.30524012446403503, "learning_rate": 3.523531666872276e-05, "loss": 0.48800498247146606, "step": 6376 }, { "epoch": 7.824539877300613, "grad_norm": 0.2988888919353485, "learning_rate": 3.523073723526073e-05, "loss": 0.5395156145095825, "step": 6377 }, { "epoch": 7.825766871165644, "grad_norm": 0.2737588584423065, "learning_rate": 3.522615738942686e-05, "loss": 0.777355968952179, "step": 6378 }, { "epoch": 7.8269938650306745, "grad_norm": 0.23220330476760864, "learning_rate": 3.5221577131405756e-05, "loss": 0.6258413791656494, "step": 6379 }, { "epoch": 7.828220858895706, "grad_norm": 0.2288714200258255, "learning_rate": 3.521699646138203e-05, "loss": 0.7875512838363647, "step": 6380 }, { "epoch": 7.829447852760736, "grad_norm": 0.23630933463573456, "learning_rate": 3.521241537954031e-05, "loss": 0.7902424931526184, "step": 6381 }, { "epoch": 7.830674846625767, "grad_norm": 0.264705091714859, "learning_rate": 3.520783388606526e-05, "loss": 0.7123110294342041, "step": 6382 }, { "epoch": 7.831901840490797, "grad_norm": 0.2698476016521454, "learning_rate": 3.520325198114153e-05, "loss": 0.5846637487411499, "step": 6383 }, { "epoch": 7.833128834355828, "grad_norm": 0.31872043013572693, "learning_rate": 3.519866966495382e-05, "loss": 0.4383864104747772, "step": 6384 }, { "epoch": 7.8343558282208585, "grad_norm": 0.23249061405658722, "learning_rate": 3.519408693768682e-05, "loss": 0.7232523560523987, "step": 6385 }, { "epoch": 7.83558282208589, "grad_norm": 0.4265371561050415, "learning_rate": 3.518950379952526e-05, "loss": 0.42467623949050903, "step": 6386 }, { "epoch": 7.83680981595092, "grad_norm": 0.3259751498699188, "learning_rate": 3.518492025065385e-05, "loss": 0.6744294166564941, "step": 6387 }, { "epoch": 7.838036809815951, "grad_norm": 0.2832203805446625, "learning_rate": 3.5180336291257366e-05, "loss": 0.6289308071136475, "step": 6388 }, { "epoch": 7.839263803680982, "grad_norm": 0.26687878370285034, "learning_rate": 3.517575192152055e-05, "loss": 0.8231030702590942, "step": 6389 }, { "epoch": 7.840490797546012, "grad_norm": 0.3132323920726776, "learning_rate": 3.51711671416282e-05, "loss": 0.8777278661727905, "step": 6390 }, { "epoch": 7.8417177914110425, "grad_norm": 0.7409022450447083, "learning_rate": 3.516658195176511e-05, "loss": 0.6414737701416016, "step": 6391 }, { "epoch": 7.842944785276074, "grad_norm": 0.35530325770378113, "learning_rate": 3.51619963521161e-05, "loss": 0.680387556552887, "step": 6392 }, { "epoch": 7.844171779141105, "grad_norm": 0.25872695446014404, "learning_rate": 3.5157410342866005e-05, "loss": 0.701423704624176, "step": 6393 }, { "epoch": 7.845398773006135, "grad_norm": 0.2968560457229614, "learning_rate": 3.515282392419966e-05, "loss": 0.712870717048645, "step": 6394 }, { "epoch": 7.846625766871165, "grad_norm": 0.2847554385662079, "learning_rate": 3.514823709630194e-05, "loss": 0.7496162056922913, "step": 6395 }, { "epoch": 7.847852760736196, "grad_norm": 0.31480348110198975, "learning_rate": 3.514364985935773e-05, "loss": 0.6806473731994629, "step": 6396 }, { "epoch": 7.849079754601227, "grad_norm": 0.3095274567604065, "learning_rate": 3.513906221355193e-05, "loss": 0.6364094018936157, "step": 6397 }, { "epoch": 7.850306748466258, "grad_norm": 0.24934789538383484, "learning_rate": 3.5134474159069436e-05, "loss": 0.9236124753952026, "step": 6398 }, { "epoch": 7.851533742331288, "grad_norm": 0.245953768491745, "learning_rate": 3.51298856960952e-05, "loss": 0.6308124661445618, "step": 6399 }, { "epoch": 7.852760736196319, "grad_norm": 0.23955896496772766, "learning_rate": 3.512529682481417e-05, "loss": 0.7884780168533325, "step": 6400 }, { "epoch": 7.85398773006135, "grad_norm": 0.34409114718437195, "learning_rate": 3.5120707545411294e-05, "loss": 0.7058672904968262, "step": 6401 }, { "epoch": 7.85521472392638, "grad_norm": 0.2741236388683319, "learning_rate": 3.511611785807157e-05, "loss": 0.5750146508216858, "step": 6402 }, { "epoch": 7.856441717791411, "grad_norm": 0.29089900851249695, "learning_rate": 3.511152776297998e-05, "loss": 0.730210542678833, "step": 6403 }, { "epoch": 7.857668711656442, "grad_norm": 0.3706778883934021, "learning_rate": 3.5106937260321545e-05, "loss": 0.6173092126846313, "step": 6404 }, { "epoch": 7.858895705521473, "grad_norm": 0.23215904831886292, "learning_rate": 3.510234635028129e-05, "loss": 0.5972052812576294, "step": 6405 }, { "epoch": 7.860122699386503, "grad_norm": 0.29858651757240295, "learning_rate": 3.509775503304428e-05, "loss": 0.7495841979980469, "step": 6406 }, { "epoch": 7.861349693251534, "grad_norm": 0.23676376044750214, "learning_rate": 3.5093163308795555e-05, "loss": 0.9054660797119141, "step": 6407 }, { "epoch": 7.862576687116564, "grad_norm": 0.259213387966156, "learning_rate": 3.5088571177720206e-05, "loss": 0.6633874177932739, "step": 6408 }, { "epoch": 7.863803680981595, "grad_norm": 0.29971373081207275, "learning_rate": 3.508397864000333e-05, "loss": 0.7040373086929321, "step": 6409 }, { "epoch": 7.865030674846626, "grad_norm": 0.2436707615852356, "learning_rate": 3.507938569583003e-05, "loss": 0.8951126337051392, "step": 6410 }, { "epoch": 7.866257668711657, "grad_norm": 0.2702217400074005, "learning_rate": 3.507479234538544e-05, "loss": 0.6187397837638855, "step": 6411 }, { "epoch": 7.867484662576687, "grad_norm": 0.32660508155822754, "learning_rate": 3.5070198588854706e-05, "loss": 0.7126858830451965, "step": 6412 }, { "epoch": 7.868711656441718, "grad_norm": 0.307948499917984, "learning_rate": 3.506560442642299e-05, "loss": 0.5180830359458923, "step": 6413 }, { "epoch": 7.869938650306748, "grad_norm": 0.22877462208271027, "learning_rate": 3.506100985827547e-05, "loss": 0.6733094453811646, "step": 6414 }, { "epoch": 7.871165644171779, "grad_norm": 0.26358088850975037, "learning_rate": 3.505641488459733e-05, "loss": 0.6706508994102478, "step": 6415 }, { "epoch": 7.87239263803681, "grad_norm": 0.3082081377506256, "learning_rate": 3.5051819505573794e-05, "loss": 0.6833845376968384, "step": 6416 }, { "epoch": 7.873619631901841, "grad_norm": 0.26219096779823303, "learning_rate": 3.504722372139008e-05, "loss": 0.6176770925521851, "step": 6417 }, { "epoch": 7.874846625766871, "grad_norm": 0.27361106872558594, "learning_rate": 3.504262753223143e-05, "loss": 0.8033627271652222, "step": 6418 }, { "epoch": 7.876073619631902, "grad_norm": 0.2716399133205414, "learning_rate": 3.503803093828311e-05, "loss": 0.6651526093482971, "step": 6419 }, { "epoch": 7.877300613496932, "grad_norm": 0.25622496008872986, "learning_rate": 3.503343393973039e-05, "loss": 0.6903812289237976, "step": 6420 }, { "epoch": 7.8785276073619634, "grad_norm": 0.27543526887893677, "learning_rate": 3.502883653675857e-05, "loss": 0.7917654514312744, "step": 6421 }, { "epoch": 7.879754601226994, "grad_norm": 0.32101503014564514, "learning_rate": 3.502423872955295e-05, "loss": 0.5578662157058716, "step": 6422 }, { "epoch": 7.880981595092025, "grad_norm": 0.24779397249221802, "learning_rate": 3.501964051829885e-05, "loss": 0.8134722113609314, "step": 6423 }, { "epoch": 7.882208588957055, "grad_norm": 0.2430768609046936, "learning_rate": 3.501504190318163e-05, "loss": 0.7088840007781982, "step": 6424 }, { "epoch": 7.883435582822086, "grad_norm": 0.30295446515083313, "learning_rate": 3.501044288438663e-05, "loss": 0.885088324546814, "step": 6425 }, { "epoch": 7.884662576687116, "grad_norm": 0.24558910727500916, "learning_rate": 3.5005843462099225e-05, "loss": 0.6463426351547241, "step": 6426 }, { "epoch": 7.8858895705521475, "grad_norm": 0.3304503560066223, "learning_rate": 3.5001243636504796e-05, "loss": 0.5815126895904541, "step": 6427 }, { "epoch": 7.887116564417178, "grad_norm": 0.23607802391052246, "learning_rate": 3.499664340778878e-05, "loss": 0.7974143624305725, "step": 6428 }, { "epoch": 7.888343558282209, "grad_norm": 0.2649649381637573, "learning_rate": 3.499204277613657e-05, "loss": 0.685636043548584, "step": 6429 }, { "epoch": 7.889570552147239, "grad_norm": 0.21305717527866364, "learning_rate": 3.4987441741733615e-05, "loss": 0.7094959020614624, "step": 6430 }, { "epoch": 7.89079754601227, "grad_norm": 0.34879615902900696, "learning_rate": 3.498284030476537e-05, "loss": 0.6834966540336609, "step": 6431 }, { "epoch": 7.8920245398773, "grad_norm": 0.20447705686092377, "learning_rate": 3.4978238465417304e-05, "loss": 0.8545449376106262, "step": 6432 }, { "epoch": 7.8932515337423315, "grad_norm": 0.21721139550209045, "learning_rate": 3.4973636223874906e-05, "loss": 0.8348745107650757, "step": 6433 }, { "epoch": 7.894478527607362, "grad_norm": 0.30214226245880127, "learning_rate": 3.496903358032368e-05, "loss": 0.7201313376426697, "step": 6434 }, { "epoch": 7.895705521472393, "grad_norm": 0.31810301542282104, "learning_rate": 3.4964430534949135e-05, "loss": 0.5946004390716553, "step": 6435 }, { "epoch": 7.896932515337423, "grad_norm": 0.30808866024017334, "learning_rate": 3.495982708793682e-05, "loss": 0.6733134984970093, "step": 6436 }, { "epoch": 7.898159509202454, "grad_norm": 0.2142728716135025, "learning_rate": 3.495522323947228e-05, "loss": 0.7299965620040894, "step": 6437 }, { "epoch": 7.899386503067484, "grad_norm": 0.2934020757675171, "learning_rate": 3.495061898974108e-05, "loss": 0.6303662657737732, "step": 6438 }, { "epoch": 7.9006134969325155, "grad_norm": 0.325406938791275, "learning_rate": 3.4946014338928815e-05, "loss": 0.7807521820068359, "step": 6439 }, { "epoch": 7.901840490797546, "grad_norm": 0.3040265142917633, "learning_rate": 3.4941409287221075e-05, "loss": 0.6540768146514893, "step": 6440 }, { "epoch": 7.903067484662577, "grad_norm": 0.27521607279777527, "learning_rate": 3.493680383480348e-05, "loss": 0.5558710098266602, "step": 6441 }, { "epoch": 7.904294478527607, "grad_norm": 0.23199652135372162, "learning_rate": 3.493219798186167e-05, "loss": 0.8160004615783691, "step": 6442 }, { "epoch": 7.905521472392638, "grad_norm": 0.24023771286010742, "learning_rate": 3.492759172858128e-05, "loss": 0.7762923240661621, "step": 6443 }, { "epoch": 7.906748466257668, "grad_norm": 0.28182023763656616, "learning_rate": 3.492298507514799e-05, "loss": 0.60749351978302, "step": 6444 }, { "epoch": 7.9079754601226995, "grad_norm": 0.2928975522518158, "learning_rate": 3.491837802174746e-05, "loss": 0.782016396522522, "step": 6445 }, { "epoch": 7.90920245398773, "grad_norm": 0.2537023425102234, "learning_rate": 3.4913770568565407e-05, "loss": 0.7524576187133789, "step": 6446 }, { "epoch": 7.910429447852761, "grad_norm": 0.2764695882797241, "learning_rate": 3.4909162715787534e-05, "loss": 0.7298226952552795, "step": 6447 }, { "epoch": 7.911656441717791, "grad_norm": 0.2455717772245407, "learning_rate": 3.490455446359958e-05, "loss": 0.8229846358299255, "step": 6448 }, { "epoch": 7.912883435582822, "grad_norm": 0.2640504539012909, "learning_rate": 3.4899945812187276e-05, "loss": 0.760779857635498, "step": 6449 }, { "epoch": 7.914110429447852, "grad_norm": 0.2706817090511322, "learning_rate": 3.48953367617364e-05, "loss": 0.6462525129318237, "step": 6450 }, { "epoch": 7.9153374233128835, "grad_norm": 0.3083465099334717, "learning_rate": 3.489072731243272e-05, "loss": 0.5665464997291565, "step": 6451 }, { "epoch": 7.916564417177915, "grad_norm": 0.260684609413147, "learning_rate": 3.4886117464462024e-05, "loss": 0.6318472623825073, "step": 6452 }, { "epoch": 7.917791411042945, "grad_norm": 0.27914464473724365, "learning_rate": 3.488150721801014e-05, "loss": 0.6443079710006714, "step": 6453 }, { "epoch": 7.919018404907975, "grad_norm": 0.25598862767219543, "learning_rate": 3.487689657326287e-05, "loss": 0.6266574263572693, "step": 6454 }, { "epoch": 7.920245398773006, "grad_norm": 0.28930819034576416, "learning_rate": 3.487228553040609e-05, "loss": 0.6438083648681641, "step": 6455 }, { "epoch": 7.921472392638037, "grad_norm": 0.3934570252895355, "learning_rate": 3.486767408962562e-05, "loss": 0.5076234340667725, "step": 6456 }, { "epoch": 7.9226993865030675, "grad_norm": 0.29801636934280396, "learning_rate": 3.486306225110735e-05, "loss": 0.7471606135368347, "step": 6457 }, { "epoch": 7.923926380368098, "grad_norm": 0.3640686571598053, "learning_rate": 3.485845001503718e-05, "loss": 0.49953025579452515, "step": 6458 }, { "epoch": 7.925153374233129, "grad_norm": 0.3600558638572693, "learning_rate": 3.4853837381601004e-05, "loss": 0.39878541231155396, "step": 6459 }, { "epoch": 7.92638036809816, "grad_norm": 0.27834567427635193, "learning_rate": 3.484922435098474e-05, "loss": 0.5950149297714233, "step": 6460 }, { "epoch": 7.92760736196319, "grad_norm": 0.2558915615081787, "learning_rate": 3.4844610923374335e-05, "loss": 0.5380964279174805, "step": 6461 }, { "epoch": 7.92883435582822, "grad_norm": 0.28536391258239746, "learning_rate": 3.483999709895575e-05, "loss": 0.7931221723556519, "step": 6462 }, { "epoch": 7.9300613496932515, "grad_norm": 0.2983061373233795, "learning_rate": 3.483538287791495e-05, "loss": 0.7756168246269226, "step": 6463 }, { "epoch": 7.931288343558283, "grad_norm": 0.2234172224998474, "learning_rate": 3.48307682604379e-05, "loss": 0.8787049055099487, "step": 6464 }, { "epoch": 7.932515337423313, "grad_norm": 0.2903193533420563, "learning_rate": 3.482615324671064e-05, "loss": 0.7862464189529419, "step": 6465 }, { "epoch": 7.933742331288344, "grad_norm": 0.2693476378917694, "learning_rate": 3.482153783691915e-05, "loss": 0.7960899472236633, "step": 6466 }, { "epoch": 7.934969325153374, "grad_norm": 0.29353418946266174, "learning_rate": 3.481692203124949e-05, "loss": 0.816847562789917, "step": 6467 }, { "epoch": 7.936196319018405, "grad_norm": 0.25159355998039246, "learning_rate": 3.48123058298877e-05, "loss": 0.8966851830482483, "step": 6468 }, { "epoch": 7.9374233128834355, "grad_norm": 0.28627049922943115, "learning_rate": 3.480768923301985e-05, "loss": 0.7213513851165771, "step": 6469 }, { "epoch": 7.938650306748467, "grad_norm": 0.2508056163787842, "learning_rate": 3.4803072240832015e-05, "loss": 0.7641868591308594, "step": 6470 }, { "epoch": 7.939877300613497, "grad_norm": 0.29177016019821167, "learning_rate": 3.479845485351029e-05, "loss": 0.4609408974647522, "step": 6471 }, { "epoch": 7.941104294478528, "grad_norm": 0.22861698269844055, "learning_rate": 3.479383707124081e-05, "loss": 0.6127218008041382, "step": 6472 }, { "epoch": 7.942331288343558, "grad_norm": 0.34365326166152954, "learning_rate": 3.4789218894209685e-05, "loss": 0.5710746049880981, "step": 6473 }, { "epoch": 7.943558282208589, "grad_norm": 0.2526445984840393, "learning_rate": 3.4784600322603075e-05, "loss": 0.715705394744873, "step": 6474 }, { "epoch": 7.9447852760736195, "grad_norm": 0.2765926420688629, "learning_rate": 3.4779981356607116e-05, "loss": 0.8336486220359802, "step": 6475 }, { "epoch": 7.946012269938651, "grad_norm": 0.2400616705417633, "learning_rate": 3.477536199640801e-05, "loss": 0.7903436422348022, "step": 6476 }, { "epoch": 7.947239263803681, "grad_norm": 0.3043610155582428, "learning_rate": 3.4770742242191945e-05, "loss": 0.6667022705078125, "step": 6477 }, { "epoch": 7.948466257668712, "grad_norm": 0.27524152398109436, "learning_rate": 3.476612209414512e-05, "loss": 0.8457584381103516, "step": 6478 }, { "epoch": 7.949693251533742, "grad_norm": 0.2607647180557251, "learning_rate": 3.476150155245378e-05, "loss": 0.7337687015533447, "step": 6479 }, { "epoch": 7.950920245398773, "grad_norm": 0.24450601637363434, "learning_rate": 3.4756880617304145e-05, "loss": 0.8099370002746582, "step": 6480 }, { "epoch": 7.9521472392638035, "grad_norm": 0.25504425168037415, "learning_rate": 3.4752259288882474e-05, "loss": 0.6860841512680054, "step": 6481 }, { "epoch": 7.953374233128835, "grad_norm": 0.3081159293651581, "learning_rate": 3.474763756737506e-05, "loss": 0.615961492061615, "step": 6482 }, { "epoch": 7.954601226993865, "grad_norm": 0.3026217222213745, "learning_rate": 3.474301545296816e-05, "loss": 0.6590773463249207, "step": 6483 }, { "epoch": 7.955828220858896, "grad_norm": 0.23438891768455505, "learning_rate": 3.4738392945848106e-05, "loss": 0.7509942054748535, "step": 6484 }, { "epoch": 7.957055214723926, "grad_norm": 0.23697945475578308, "learning_rate": 3.47337700462012e-05, "loss": 0.8883111476898193, "step": 6485 }, { "epoch": 7.958282208588957, "grad_norm": 0.27560365200042725, "learning_rate": 3.4729146754213795e-05, "loss": 0.49951881170272827, "step": 6486 }, { "epoch": 7.9595092024539875, "grad_norm": 0.23908619582653046, "learning_rate": 3.472452307007222e-05, "loss": 0.906695544719696, "step": 6487 }, { "epoch": 7.960736196319019, "grad_norm": 0.2509385347366333, "learning_rate": 3.471989899396286e-05, "loss": 0.8410787582397461, "step": 6488 }, { "epoch": 7.961963190184049, "grad_norm": 0.27943527698516846, "learning_rate": 3.4715274526072104e-05, "loss": 0.5717967748641968, "step": 6489 }, { "epoch": 7.96319018404908, "grad_norm": 0.20939888060092926, "learning_rate": 3.4710649666586334e-05, "loss": 0.7739972472190857, "step": 6490 }, { "epoch": 7.96441717791411, "grad_norm": 0.3194470703601837, "learning_rate": 3.470602441569197e-05, "loss": 0.4966714084148407, "step": 6491 }, { "epoch": 7.965644171779141, "grad_norm": 0.22642241418361664, "learning_rate": 3.4701398773575445e-05, "loss": 0.8970286846160889, "step": 6492 }, { "epoch": 7.9668711656441715, "grad_norm": 0.3319435119628906, "learning_rate": 3.46967727404232e-05, "loss": 0.46915513277053833, "step": 6493 }, { "epoch": 7.968098159509203, "grad_norm": 0.26808199286460876, "learning_rate": 3.4692146316421706e-05, "loss": 0.704731822013855, "step": 6494 }, { "epoch": 7.969325153374233, "grad_norm": 0.2165178805589676, "learning_rate": 3.468751950175744e-05, "loss": 0.7739938497543335, "step": 6495 }, { "epoch": 7.970552147239264, "grad_norm": 0.2882135510444641, "learning_rate": 3.468289229661689e-05, "loss": 0.5214114785194397, "step": 6496 }, { "epoch": 7.971779141104294, "grad_norm": 0.40683290362358093, "learning_rate": 3.4678264701186563e-05, "loss": 0.6191822290420532, "step": 6497 }, { "epoch": 7.973006134969325, "grad_norm": 0.3085503876209259, "learning_rate": 3.467363671565299e-05, "loss": 0.6362425088882446, "step": 6498 }, { "epoch": 7.9742331288343555, "grad_norm": 0.35361918807029724, "learning_rate": 3.466900834020272e-05, "loss": 0.3856748938560486, "step": 6499 }, { "epoch": 7.975460122699387, "grad_norm": 0.24998952448368073, "learning_rate": 3.466437957502229e-05, "loss": 0.7872902750968933, "step": 6500 }, { "epoch": 7.976687116564417, "grad_norm": 0.27156952023506165, "learning_rate": 3.46597504202983e-05, "loss": 0.7062777280807495, "step": 6501 }, { "epoch": 7.977914110429448, "grad_norm": 0.26051104068756104, "learning_rate": 3.46551208762173e-05, "loss": 0.8083308935165405, "step": 6502 }, { "epoch": 7.979141104294478, "grad_norm": 0.22289445996284485, "learning_rate": 3.465049094296592e-05, "loss": 0.5995118618011475, "step": 6503 }, { "epoch": 7.980368098159509, "grad_norm": 0.34567004442214966, "learning_rate": 3.4645860620730786e-05, "loss": 0.5565685629844666, "step": 6504 }, { "epoch": 7.9815950920245395, "grad_norm": 0.28835049271583557, "learning_rate": 3.464122990969851e-05, "loss": 0.7552725076675415, "step": 6505 }, { "epoch": 7.982822085889571, "grad_norm": 0.2436736524105072, "learning_rate": 3.463659881005576e-05, "loss": 0.6493717432022095, "step": 6506 }, { "epoch": 7.984049079754601, "grad_norm": 0.23040474951267242, "learning_rate": 3.4631967321989186e-05, "loss": 0.8066229820251465, "step": 6507 }, { "epoch": 7.985276073619632, "grad_norm": 0.29692965745925903, "learning_rate": 3.4627335445685496e-05, "loss": 0.6778465509414673, "step": 6508 }, { "epoch": 7.986503067484662, "grad_norm": 0.2783028185367584, "learning_rate": 3.462270318133136e-05, "loss": 0.6162908673286438, "step": 6509 }, { "epoch": 7.987730061349693, "grad_norm": 0.22288717329502106, "learning_rate": 3.4618070529113514e-05, "loss": 0.8419781923294067, "step": 6510 }, { "epoch": 7.9889570552147235, "grad_norm": 0.2464863359928131, "learning_rate": 3.461343748921867e-05, "loss": 0.7231227159500122, "step": 6511 }, { "epoch": 7.990184049079755, "grad_norm": 0.2566157877445221, "learning_rate": 3.460880406183358e-05, "loss": 0.6673516631126404, "step": 6512 }, { "epoch": 7.991411042944785, "grad_norm": 0.29517680406570435, "learning_rate": 3.4604170247145004e-05, "loss": 0.9403156042098999, "step": 6513 }, { "epoch": 7.992638036809816, "grad_norm": 0.2383507639169693, "learning_rate": 3.4599536045339713e-05, "loss": 0.8426942825317383, "step": 6514 }, { "epoch": 7.993865030674847, "grad_norm": 0.24393434822559357, "learning_rate": 3.459490145660451e-05, "loss": 0.7482519149780273, "step": 6515 }, { "epoch": 7.995092024539877, "grad_norm": 0.31351780891418457, "learning_rate": 3.4590266481126186e-05, "loss": 0.6960428953170776, "step": 6516 }, { "epoch": 7.9963190184049076, "grad_norm": 0.26115188002586365, "learning_rate": 3.458563111909158e-05, "loss": 0.7388300895690918, "step": 6517 }, { "epoch": 7.997546012269939, "grad_norm": 0.30643945932388306, "learning_rate": 3.4580995370687516e-05, "loss": 0.6358373165130615, "step": 6518 }, { "epoch": 7.99877300613497, "grad_norm": 0.2941887676715851, "learning_rate": 3.457635923610086e-05, "loss": 0.6924500465393066, "step": 6519 }, { "epoch": 8.0, "grad_norm": 0.3100006878376007, "learning_rate": 3.457172271551848e-05, "loss": 0.6875077486038208, "step": 6520 }, { "epoch": 8.00122699386503, "grad_norm": 0.27356982231140137, "learning_rate": 3.456708580912725e-05, "loss": 0.6078600883483887, "step": 6521 }, { "epoch": 8.002453987730062, "grad_norm": 0.267820805311203, "learning_rate": 3.456244851711408e-05, "loss": 0.6574736833572388, "step": 6522 }, { "epoch": 8.003680981595092, "grad_norm": 0.27556657791137695, "learning_rate": 3.455781083966588e-05, "loss": 0.6894514560699463, "step": 6523 }, { "epoch": 8.004907975460123, "grad_norm": 0.2400527000427246, "learning_rate": 3.4553172776969584e-05, "loss": 0.5532233715057373, "step": 6524 }, { "epoch": 8.006134969325153, "grad_norm": 0.29308801889419556, "learning_rate": 3.4548534329212144e-05, "loss": 0.6641072034835815, "step": 6525 }, { "epoch": 8.007361963190185, "grad_norm": 0.2722140848636627, "learning_rate": 3.454389549658051e-05, "loss": 0.5609369874000549, "step": 6526 }, { "epoch": 8.008588957055215, "grad_norm": 0.2134161740541458, "learning_rate": 3.4539256279261677e-05, "loss": 0.715683102607727, "step": 6527 }, { "epoch": 8.009815950920245, "grad_norm": 0.2509402930736542, "learning_rate": 3.453461667744263e-05, "loss": 0.5021774768829346, "step": 6528 }, { "epoch": 8.011042944785276, "grad_norm": 0.24440458416938782, "learning_rate": 3.4529976691310376e-05, "loss": 0.7776093482971191, "step": 6529 }, { "epoch": 8.012269938650308, "grad_norm": 0.3059106767177582, "learning_rate": 3.4525336321051946e-05, "loss": 0.6575092673301697, "step": 6530 }, { "epoch": 8.013496932515338, "grad_norm": 0.2342449575662613, "learning_rate": 3.452069556685437e-05, "loss": 0.8387891054153442, "step": 6531 }, { "epoch": 8.014723926380368, "grad_norm": 0.2909540832042694, "learning_rate": 3.4516054428904715e-05, "loss": 0.6307786703109741, "step": 6532 }, { "epoch": 8.015950920245398, "grad_norm": 0.3137982189655304, "learning_rate": 3.4511412907390044e-05, "loss": 0.6348243355751038, "step": 6533 }, { "epoch": 8.01717791411043, "grad_norm": 0.3281397521495819, "learning_rate": 3.4506771002497447e-05, "loss": 0.3889496922492981, "step": 6534 }, { "epoch": 8.01840490797546, "grad_norm": 0.2251000702381134, "learning_rate": 3.450212871441402e-05, "loss": 0.6736846566200256, "step": 6535 }, { "epoch": 8.01963190184049, "grad_norm": 0.31394270062446594, "learning_rate": 3.4497486043326895e-05, "loss": 0.7305171489715576, "step": 6536 }, { "epoch": 8.020858895705521, "grad_norm": 0.20272260904312134, "learning_rate": 3.449284298942319e-05, "loss": 0.8186571598052979, "step": 6537 }, { "epoch": 8.022085889570553, "grad_norm": 0.2684935927391052, "learning_rate": 3.448819955289005e-05, "loss": 0.5216366648674011, "step": 6538 }, { "epoch": 8.023312883435583, "grad_norm": 0.31893911957740784, "learning_rate": 3.4483555733914665e-05, "loss": 0.5372329950332642, "step": 6539 }, { "epoch": 8.024539877300613, "grad_norm": 0.2937241196632385, "learning_rate": 3.4478911532684183e-05, "loss": 0.43371719121932983, "step": 6540 }, { "epoch": 8.025766871165644, "grad_norm": 0.34720730781555176, "learning_rate": 3.4474266949385817e-05, "loss": 0.507535994052887, "step": 6541 }, { "epoch": 8.026993865030676, "grad_norm": 0.25088927149772644, "learning_rate": 3.4469621984206765e-05, "loss": 0.6389656066894531, "step": 6542 }, { "epoch": 8.028220858895706, "grad_norm": 0.2270653247833252, "learning_rate": 3.4464976637334265e-05, "loss": 0.6977084875106812, "step": 6543 }, { "epoch": 8.029447852760736, "grad_norm": 0.24516327679157257, "learning_rate": 3.4460330908955557e-05, "loss": 0.7927283048629761, "step": 6544 }, { "epoch": 8.030674846625766, "grad_norm": 0.27388375997543335, "learning_rate": 3.445568479925788e-05, "loss": 0.5395816564559937, "step": 6545 }, { "epoch": 8.031901840490798, "grad_norm": 0.2702625095844269, "learning_rate": 3.4451038308428525e-05, "loss": 0.5426216125488281, "step": 6546 }, { "epoch": 8.033128834355828, "grad_norm": 0.2544507086277008, "learning_rate": 3.444639143665477e-05, "loss": 0.7305755615234375, "step": 6547 }, { "epoch": 8.034355828220859, "grad_norm": 0.3181895911693573, "learning_rate": 3.444174418412392e-05, "loss": 0.5980538725852966, "step": 6548 }, { "epoch": 8.035582822085889, "grad_norm": 0.30269744992256165, "learning_rate": 3.443709655102328e-05, "loss": 0.7736784815788269, "step": 6549 }, { "epoch": 8.036809815950921, "grad_norm": 0.23098456859588623, "learning_rate": 3.4432448537540204e-05, "loss": 0.6303762197494507, "step": 6550 }, { "epoch": 8.038036809815951, "grad_norm": 0.3320716917514801, "learning_rate": 3.442780014386203e-05, "loss": 0.7084223031997681, "step": 6551 }, { "epoch": 8.039263803680981, "grad_norm": 0.3203427195549011, "learning_rate": 3.442315137017612e-05, "loss": 0.48406997323036194, "step": 6552 }, { "epoch": 8.040490797546012, "grad_norm": 0.32803186774253845, "learning_rate": 3.4418502216669843e-05, "loss": 0.5401580333709717, "step": 6553 }, { "epoch": 8.041717791411044, "grad_norm": 0.22217169404029846, "learning_rate": 3.4413852683530626e-05, "loss": 0.7629472017288208, "step": 6554 }, { "epoch": 8.042944785276074, "grad_norm": 0.24001264572143555, "learning_rate": 3.4409202770945836e-05, "loss": 0.6622973680496216, "step": 6555 }, { "epoch": 8.044171779141104, "grad_norm": 0.2759571671485901, "learning_rate": 3.4404552479102924e-05, "loss": 0.7148001790046692, "step": 6556 }, { "epoch": 8.045398773006134, "grad_norm": 0.24738717079162598, "learning_rate": 3.439990180818933e-05, "loss": 0.783972442150116, "step": 6557 }, { "epoch": 8.046625766871166, "grad_norm": 0.3031608760356903, "learning_rate": 3.43952507583925e-05, "loss": 0.43376263976097107, "step": 6558 }, { "epoch": 8.047852760736196, "grad_norm": 0.2577238380908966, "learning_rate": 3.439059932989991e-05, "loss": 0.5904891490936279, "step": 6559 }, { "epoch": 8.049079754601227, "grad_norm": 0.23315668106079102, "learning_rate": 3.438594752289903e-05, "loss": 0.7123715877532959, "step": 6560 }, { "epoch": 8.050306748466257, "grad_norm": 0.2594318389892578, "learning_rate": 3.4381295337577394e-05, "loss": 0.834304690361023, "step": 6561 }, { "epoch": 8.051533742331289, "grad_norm": 0.24216923117637634, "learning_rate": 3.4376642774122486e-05, "loss": 0.8244535326957703, "step": 6562 }, { "epoch": 8.05276073619632, "grad_norm": 0.2752462923526764, "learning_rate": 3.437198983272186e-05, "loss": 0.6450597643852234, "step": 6563 }, { "epoch": 8.05398773006135, "grad_norm": 0.2858986556529999, "learning_rate": 3.436733651356305e-05, "loss": 0.7217491865158081, "step": 6564 }, { "epoch": 8.05521472392638, "grad_norm": 0.22606411576271057, "learning_rate": 3.436268281683362e-05, "loss": 0.7929929494857788, "step": 6565 }, { "epoch": 8.056441717791412, "grad_norm": 0.23301400244235992, "learning_rate": 3.4358028742721153e-05, "loss": 0.6518545150756836, "step": 6566 }, { "epoch": 8.057668711656442, "grad_norm": 0.40565744042396545, "learning_rate": 3.435337429141323e-05, "loss": 0.4003661274909973, "step": 6567 }, { "epoch": 8.058895705521472, "grad_norm": 0.7749685645103455, "learning_rate": 3.4348719463097473e-05, "loss": 0.7202525734901428, "step": 6568 }, { "epoch": 8.060122699386502, "grad_norm": 0.2884778082370758, "learning_rate": 3.4344064257961496e-05, "loss": 0.7206721305847168, "step": 6569 }, { "epoch": 8.061349693251534, "grad_norm": 0.31874486804008484, "learning_rate": 3.433940867619294e-05, "loss": 0.5952557921409607, "step": 6570 }, { "epoch": 8.062576687116565, "grad_norm": 0.24019302427768707, "learning_rate": 3.433475271797946e-05, "loss": 0.752179741859436, "step": 6571 }, { "epoch": 8.063803680981595, "grad_norm": 0.36350879073143005, "learning_rate": 3.433009638350871e-05, "loss": 0.5342015027999878, "step": 6572 }, { "epoch": 8.065030674846625, "grad_norm": 0.264597624540329, "learning_rate": 3.43254396729684e-05, "loss": 0.7164100408554077, "step": 6573 }, { "epoch": 8.066257668711657, "grad_norm": 0.30489668250083923, "learning_rate": 3.4320782586546195e-05, "loss": 0.584200918674469, "step": 6574 }, { "epoch": 8.067484662576687, "grad_norm": 0.27320748567581177, "learning_rate": 3.4316125124429835e-05, "loss": 0.7258477210998535, "step": 6575 }, { "epoch": 8.068711656441717, "grad_norm": 0.2720887064933777, "learning_rate": 3.431146728680705e-05, "loss": 0.585737943649292, "step": 6576 }, { "epoch": 8.069938650306748, "grad_norm": 0.23702457547187805, "learning_rate": 3.430680907386557e-05, "loss": 0.6848354339599609, "step": 6577 }, { "epoch": 8.07116564417178, "grad_norm": 0.24968260526657104, "learning_rate": 3.430215048579316e-05, "loss": 0.6928352117538452, "step": 6578 }, { "epoch": 8.07239263803681, "grad_norm": 0.2909610867500305, "learning_rate": 3.42974915227776e-05, "loss": 0.5211905241012573, "step": 6579 }, { "epoch": 8.07361963190184, "grad_norm": 0.21734477579593658, "learning_rate": 3.4292832185006665e-05, "loss": 0.6792640089988708, "step": 6580 }, { "epoch": 8.07484662576687, "grad_norm": 0.281751424074173, "learning_rate": 3.428817247266818e-05, "loss": 0.4982343912124634, "step": 6581 }, { "epoch": 8.076073619631902, "grad_norm": 0.264277845621109, "learning_rate": 3.428351238594994e-05, "loss": 0.5141274929046631, "step": 6582 }, { "epoch": 8.077300613496933, "grad_norm": 0.30388522148132324, "learning_rate": 3.4278851925039803e-05, "loss": 0.6607720851898193, "step": 6583 }, { "epoch": 8.078527607361963, "grad_norm": 0.2449156790971756, "learning_rate": 3.42741910901256e-05, "loss": 0.7304635047912598, "step": 6584 }, { "epoch": 8.079754601226995, "grad_norm": 0.28682318329811096, "learning_rate": 3.426952988139521e-05, "loss": 0.5263573527336121, "step": 6585 }, { "epoch": 8.080981595092025, "grad_norm": 0.26624566316604614, "learning_rate": 3.4264868299036515e-05, "loss": 0.6886088848114014, "step": 6586 }, { "epoch": 8.082208588957055, "grad_norm": 0.3249112069606781, "learning_rate": 3.426020634323739e-05, "loss": 0.5821573734283447, "step": 6587 }, { "epoch": 8.083435582822085, "grad_norm": 0.21348200738430023, "learning_rate": 3.425554401418577e-05, "loss": 0.7147064208984375, "step": 6588 }, { "epoch": 8.084662576687117, "grad_norm": 0.2978939712047577, "learning_rate": 3.425088131206956e-05, "loss": 0.5427660346031189, "step": 6589 }, { "epoch": 8.085889570552148, "grad_norm": 0.24759453535079956, "learning_rate": 3.4246218237076716e-05, "loss": 0.8127779364585876, "step": 6590 }, { "epoch": 8.087116564417178, "grad_norm": 0.2325248122215271, "learning_rate": 3.424155478939518e-05, "loss": 0.6260792016983032, "step": 6591 }, { "epoch": 8.088343558282208, "grad_norm": 0.27321657538414, "learning_rate": 3.4236890969212934e-05, "loss": 0.6073760986328125, "step": 6592 }, { "epoch": 8.08957055214724, "grad_norm": 0.2590590715408325, "learning_rate": 3.423222677671795e-05, "loss": 0.7219470739364624, "step": 6593 }, { "epoch": 8.09079754601227, "grad_norm": 0.2691342532634735, "learning_rate": 3.4227562212098236e-05, "loss": 0.5761741995811462, "step": 6594 }, { "epoch": 8.0920245398773, "grad_norm": 0.3529961407184601, "learning_rate": 3.422289727554181e-05, "loss": 0.6773962378501892, "step": 6595 }, { "epoch": 8.09325153374233, "grad_norm": 0.27662575244903564, "learning_rate": 3.4218231967236696e-05, "loss": 0.6500088572502136, "step": 6596 }, { "epoch": 8.094478527607363, "grad_norm": 0.3061162829399109, "learning_rate": 3.421356628737095e-05, "loss": 0.6178303360939026, "step": 6597 }, { "epoch": 8.095705521472393, "grad_norm": 0.6061999201774597, "learning_rate": 3.420890023613262e-05, "loss": 0.6946730613708496, "step": 6598 }, { "epoch": 8.096932515337423, "grad_norm": 0.2852933406829834, "learning_rate": 3.420423381370979e-05, "loss": 0.6246740818023682, "step": 6599 }, { "epoch": 8.098159509202453, "grad_norm": 0.2434665858745575, "learning_rate": 3.4199567020290546e-05, "loss": 0.7188349962234497, "step": 6600 }, { "epoch": 8.099386503067485, "grad_norm": 0.33422696590423584, "learning_rate": 3.419489985606299e-05, "loss": 0.506921112537384, "step": 6601 }, { "epoch": 8.100613496932516, "grad_norm": 0.2559536099433899, "learning_rate": 3.419023232121524e-05, "loss": 0.5050310492515564, "step": 6602 }, { "epoch": 8.101840490797546, "grad_norm": 0.21893779933452606, "learning_rate": 3.418556441593544e-05, "loss": 0.7795552015304565, "step": 6603 }, { "epoch": 8.103067484662576, "grad_norm": 0.2174748033285141, "learning_rate": 3.418089614041174e-05, "loss": 0.800972044467926, "step": 6604 }, { "epoch": 8.104294478527608, "grad_norm": 0.2470160573720932, "learning_rate": 3.4176227494832305e-05, "loss": 0.7871167659759521, "step": 6605 }, { "epoch": 8.105521472392638, "grad_norm": 0.235539510846138, "learning_rate": 3.4171558479385305e-05, "loss": 0.6163487434387207, "step": 6606 }, { "epoch": 8.106748466257669, "grad_norm": 0.2845281958580017, "learning_rate": 3.416688909425895e-05, "loss": 0.6980320811271667, "step": 6607 }, { "epoch": 8.107975460122699, "grad_norm": 0.3612321615219116, "learning_rate": 3.416221933964143e-05, "loss": 0.5541358590126038, "step": 6608 }, { "epoch": 8.10920245398773, "grad_norm": 0.3060509264469147, "learning_rate": 3.4157549215720976e-05, "loss": 0.5431631803512573, "step": 6609 }, { "epoch": 8.110429447852761, "grad_norm": 0.23383796215057373, "learning_rate": 3.4152878722685844e-05, "loss": 0.5834778547286987, "step": 6610 }, { "epoch": 8.111656441717791, "grad_norm": 0.4020327627658844, "learning_rate": 3.414820786072427e-05, "loss": 0.6149829626083374, "step": 6611 }, { "epoch": 8.112883435582821, "grad_norm": 0.309135764837265, "learning_rate": 3.414353663002453e-05, "loss": 0.677094042301178, "step": 6612 }, { "epoch": 8.114110429447853, "grad_norm": 0.350872665643692, "learning_rate": 3.41388650307749e-05, "loss": 0.6366096138954163, "step": 6613 }, { "epoch": 8.115337423312884, "grad_norm": 0.2634875476360321, "learning_rate": 3.413419306316369e-05, "loss": 0.7210923433303833, "step": 6614 }, { "epoch": 8.116564417177914, "grad_norm": 0.2518759071826935, "learning_rate": 3.412952072737921e-05, "loss": 0.7298399209976196, "step": 6615 }, { "epoch": 8.117791411042944, "grad_norm": 0.33522656559944153, "learning_rate": 3.4124848023609777e-05, "loss": 0.5669788122177124, "step": 6616 }, { "epoch": 8.119018404907976, "grad_norm": 0.3302757143974304, "learning_rate": 3.412017495204376e-05, "loss": 0.44271644949913025, "step": 6617 }, { "epoch": 8.120245398773006, "grad_norm": 0.2666381001472473, "learning_rate": 3.4115501512869496e-05, "loss": 0.7366783618927002, "step": 6618 }, { "epoch": 8.121472392638037, "grad_norm": 0.3189249634742737, "learning_rate": 3.411082770627537e-05, "loss": 0.5334112644195557, "step": 6619 }, { "epoch": 8.122699386503067, "grad_norm": 0.2707368731498718, "learning_rate": 3.4106153532449756e-05, "loss": 0.9005915522575378, "step": 6620 }, { "epoch": 8.123926380368099, "grad_norm": 0.3039087951183319, "learning_rate": 3.410147899158107e-05, "loss": 0.49560707807540894, "step": 6621 }, { "epoch": 8.125153374233129, "grad_norm": 0.2520478069782257, "learning_rate": 3.4096804083857725e-05, "loss": 0.6481561660766602, "step": 6622 }, { "epoch": 8.12638036809816, "grad_norm": 0.30455511808395386, "learning_rate": 3.409212880946815e-05, "loss": 0.7433929443359375, "step": 6623 }, { "epoch": 8.12760736196319, "grad_norm": 0.2969837188720703, "learning_rate": 3.40874531686008e-05, "loss": 0.6438249349594116, "step": 6624 }, { "epoch": 8.128834355828221, "grad_norm": 0.31477904319763184, "learning_rate": 3.4082777161444116e-05, "loss": 0.6396470069885254, "step": 6625 }, { "epoch": 8.130061349693252, "grad_norm": 0.32729437947273254, "learning_rate": 3.40781007881866e-05, "loss": 0.43886151909828186, "step": 6626 }, { "epoch": 8.131288343558282, "grad_norm": 0.2907702922821045, "learning_rate": 3.4073424049016737e-05, "loss": 0.562971830368042, "step": 6627 }, { "epoch": 8.132515337423312, "grad_norm": 0.2972729206085205, "learning_rate": 3.4068746944123015e-05, "loss": 0.7559869289398193, "step": 6628 }, { "epoch": 8.133742331288344, "grad_norm": 0.2983475923538208, "learning_rate": 3.406406947369398e-05, "loss": 0.6997472643852234, "step": 6629 }, { "epoch": 8.134969325153374, "grad_norm": 0.23758235573768616, "learning_rate": 3.405939163791815e-05, "loss": 0.7329686880111694, "step": 6630 }, { "epoch": 8.136196319018405, "grad_norm": 0.3049295246601105, "learning_rate": 3.405471343698409e-05, "loss": 0.8266766667366028, "step": 6631 }, { "epoch": 8.137423312883435, "grad_norm": 0.22873805463314056, "learning_rate": 3.405003487108035e-05, "loss": 0.7597131133079529, "step": 6632 }, { "epoch": 8.138650306748467, "grad_norm": 0.2770978510379791, "learning_rate": 3.4045355940395515e-05, "loss": 0.5797195434570312, "step": 6633 }, { "epoch": 8.139877300613497, "grad_norm": 0.3104846179485321, "learning_rate": 3.404067664511818e-05, "loss": 0.7728922963142395, "step": 6634 }, { "epoch": 8.141104294478527, "grad_norm": 0.3122737407684326, "learning_rate": 3.403599698543695e-05, "loss": 0.6586949229240417, "step": 6635 }, { "epoch": 8.142331288343557, "grad_norm": 0.2577395737171173, "learning_rate": 3.403131696154046e-05, "loss": 0.8584476113319397, "step": 6636 }, { "epoch": 8.14355828220859, "grad_norm": 0.2785377502441406, "learning_rate": 3.402663657361734e-05, "loss": 0.43578362464904785, "step": 6637 }, { "epoch": 8.14478527607362, "grad_norm": 0.26346489787101746, "learning_rate": 3.4021955821856246e-05, "loss": 0.856557309627533, "step": 6638 }, { "epoch": 8.14601226993865, "grad_norm": 0.22023512423038483, "learning_rate": 3.401727470644584e-05, "loss": 0.8777416944503784, "step": 6639 }, { "epoch": 8.14723926380368, "grad_norm": 0.3028993308544159, "learning_rate": 3.401259322757481e-05, "loss": 0.6895066499710083, "step": 6640 }, { "epoch": 8.148466257668712, "grad_norm": 0.3246992826461792, "learning_rate": 3.400791138543185e-05, "loss": 0.8227584362030029, "step": 6641 }, { "epoch": 8.149693251533742, "grad_norm": 0.2566029131412506, "learning_rate": 3.400322918020568e-05, "loss": 0.8887863755226135, "step": 6642 }, { "epoch": 8.150920245398773, "grad_norm": 0.3321414291858673, "learning_rate": 3.399854661208502e-05, "loss": 0.7635719776153564, "step": 6643 }, { "epoch": 8.152147239263805, "grad_norm": 0.2647212743759155, "learning_rate": 3.399386368125861e-05, "loss": 0.8291512131690979, "step": 6644 }, { "epoch": 8.153374233128835, "grad_norm": 0.23524516820907593, "learning_rate": 3.39891803879152e-05, "loss": 0.8222558498382568, "step": 6645 }, { "epoch": 8.154601226993865, "grad_norm": 0.2692973017692566, "learning_rate": 3.398449673224357e-05, "loss": 0.9141378402709961, "step": 6646 }, { "epoch": 8.155828220858895, "grad_norm": 0.20636776089668274, "learning_rate": 3.39798127144325e-05, "loss": 0.8083581924438477, "step": 6647 }, { "epoch": 8.157055214723927, "grad_norm": 0.25485461950302124, "learning_rate": 3.397512833467079e-05, "loss": 0.760316014289856, "step": 6648 }, { "epoch": 8.158282208588957, "grad_norm": 0.23966623842716217, "learning_rate": 3.3970443593147256e-05, "loss": 0.7560374736785889, "step": 6649 }, { "epoch": 8.159509202453988, "grad_norm": 0.34690725803375244, "learning_rate": 3.3965758490050726e-05, "loss": 0.4720292091369629, "step": 6650 }, { "epoch": 8.160736196319018, "grad_norm": 0.27896353602409363, "learning_rate": 3.396107302557004e-05, "loss": 0.6839118003845215, "step": 6651 }, { "epoch": 8.16196319018405, "grad_norm": 0.3209441006183624, "learning_rate": 3.395638719989406e-05, "loss": 0.4895676374435425, "step": 6652 }, { "epoch": 8.16319018404908, "grad_norm": 0.27544477581977844, "learning_rate": 3.395170101321166e-05, "loss": 0.8342794179916382, "step": 6653 }, { "epoch": 8.16441717791411, "grad_norm": 0.2800379991531372, "learning_rate": 3.3947014465711715e-05, "loss": 0.5185465812683105, "step": 6654 }, { "epoch": 8.16564417177914, "grad_norm": 0.24919946491718292, "learning_rate": 3.3942327557583143e-05, "loss": 0.7645077705383301, "step": 6655 }, { "epoch": 8.166871165644173, "grad_norm": 0.2428658902645111, "learning_rate": 3.3937640289014846e-05, "loss": 0.7007678747177124, "step": 6656 }, { "epoch": 8.168098159509203, "grad_norm": 0.2544092833995819, "learning_rate": 3.393295266019576e-05, "loss": 0.8659557104110718, "step": 6657 }, { "epoch": 8.169325153374233, "grad_norm": 0.2418612837791443, "learning_rate": 3.392826467131484e-05, "loss": 0.7178899049758911, "step": 6658 }, { "epoch": 8.170552147239263, "grad_norm": 0.31486719846725464, "learning_rate": 3.3923576322561025e-05, "loss": 0.5858457088470459, "step": 6659 }, { "epoch": 8.171779141104295, "grad_norm": 0.35038408637046814, "learning_rate": 3.39188876141233e-05, "loss": 0.6764826774597168, "step": 6660 }, { "epoch": 8.173006134969325, "grad_norm": 0.29026973247528076, "learning_rate": 3.391419854619065e-05, "loss": 0.46267133951187134, "step": 6661 }, { "epoch": 8.174233128834356, "grad_norm": 0.22563758492469788, "learning_rate": 3.3909509118952085e-05, "loss": 0.6735107898712158, "step": 6662 }, { "epoch": 8.175460122699386, "grad_norm": 0.21881119906902313, "learning_rate": 3.3904819332596614e-05, "loss": 0.836776614189148, "step": 6663 }, { "epoch": 8.176687116564418, "grad_norm": 0.3529660701751709, "learning_rate": 3.3900129187313276e-05, "loss": 0.6240255832672119, "step": 6664 }, { "epoch": 8.177914110429448, "grad_norm": 0.34090399742126465, "learning_rate": 3.389543868329112e-05, "loss": 0.509244441986084, "step": 6665 }, { "epoch": 8.179141104294478, "grad_norm": 0.2559433579444885, "learning_rate": 3.38907478207192e-05, "loss": 0.730571448802948, "step": 6666 }, { "epoch": 8.180368098159509, "grad_norm": 0.32453688979148865, "learning_rate": 3.3886056599786585e-05, "loss": 0.44011610746383667, "step": 6667 }, { "epoch": 8.18159509202454, "grad_norm": 0.23389744758605957, "learning_rate": 3.388136502068238e-05, "loss": 0.7752481698989868, "step": 6668 }, { "epoch": 8.18282208588957, "grad_norm": 0.28068381547927856, "learning_rate": 3.387667308359568e-05, "loss": 0.6093523502349854, "step": 6669 }, { "epoch": 8.184049079754601, "grad_norm": 0.321161150932312, "learning_rate": 3.38719807887156e-05, "loss": 0.5488892793655396, "step": 6670 }, { "epoch": 8.185276073619631, "grad_norm": 0.26683956384658813, "learning_rate": 3.386728813623128e-05, "loss": 0.7163190841674805, "step": 6671 }, { "epoch": 8.186503067484663, "grad_norm": 0.297057181596756, "learning_rate": 3.3862595126331864e-05, "loss": 0.7620301246643066, "step": 6672 }, { "epoch": 8.187730061349694, "grad_norm": 0.24316824972629547, "learning_rate": 3.3857901759206516e-05, "loss": 0.8720459342002869, "step": 6673 }, { "epoch": 8.188957055214724, "grad_norm": 0.283187597990036, "learning_rate": 3.3853208035044414e-05, "loss": 0.6640288829803467, "step": 6674 }, { "epoch": 8.190184049079754, "grad_norm": 0.2387113869190216, "learning_rate": 3.3848513954034745e-05, "loss": 0.6544015407562256, "step": 6675 }, { "epoch": 8.191411042944786, "grad_norm": 0.28371626138687134, "learning_rate": 3.3843819516366716e-05, "loss": 0.7255145311355591, "step": 6676 }, { "epoch": 8.192638036809816, "grad_norm": 0.21081945300102234, "learning_rate": 3.3839124722229546e-05, "loss": 0.7347935438156128, "step": 6677 }, { "epoch": 8.193865030674846, "grad_norm": 0.29744136333465576, "learning_rate": 3.383442957181247e-05, "loss": 0.6164412498474121, "step": 6678 }, { "epoch": 8.195092024539877, "grad_norm": 0.36132127046585083, "learning_rate": 3.382973406530473e-05, "loss": 0.5172337293624878, "step": 6679 }, { "epoch": 8.196319018404909, "grad_norm": 0.2752278745174408, "learning_rate": 3.3825038202895595e-05, "loss": 0.6743948459625244, "step": 6680 }, { "epoch": 8.197546012269939, "grad_norm": 0.28938743472099304, "learning_rate": 3.382034198477434e-05, "loss": 0.38466745615005493, "step": 6681 }, { "epoch": 8.198773006134969, "grad_norm": 0.26080718636512756, "learning_rate": 3.381564541113026e-05, "loss": 0.6153415441513062, "step": 6682 }, { "epoch": 8.2, "grad_norm": 0.29457148909568787, "learning_rate": 3.381094848215265e-05, "loss": 0.8736774921417236, "step": 6683 }, { "epoch": 8.201226993865031, "grad_norm": 0.28473421931266785, "learning_rate": 3.380625119803084e-05, "loss": 0.6859354972839355, "step": 6684 }, { "epoch": 8.202453987730062, "grad_norm": 0.2668801546096802, "learning_rate": 3.380155355895416e-05, "loss": 0.5473387241363525, "step": 6685 }, { "epoch": 8.203680981595092, "grad_norm": 0.3065069615840912, "learning_rate": 3.3796855565111954e-05, "loss": 0.7035098075866699, "step": 6686 }, { "epoch": 8.204907975460122, "grad_norm": 0.24748067557811737, "learning_rate": 3.37921572166936e-05, "loss": 0.8552939891815186, "step": 6687 }, { "epoch": 8.206134969325154, "grad_norm": 0.2738414406776428, "learning_rate": 3.378745851388846e-05, "loss": 0.6387930512428284, "step": 6688 }, { "epoch": 8.207361963190184, "grad_norm": 0.2634319067001343, "learning_rate": 3.3782759456885925e-05, "loss": 0.6861991286277771, "step": 6689 }, { "epoch": 8.208588957055214, "grad_norm": 0.28510305285453796, "learning_rate": 3.377806004587541e-05, "loss": 0.49792206287384033, "step": 6690 }, { "epoch": 8.209815950920245, "grad_norm": 0.31293758749961853, "learning_rate": 3.3773360281046326e-05, "loss": 0.7062374353408813, "step": 6691 }, { "epoch": 8.211042944785277, "grad_norm": 0.26503121852874756, "learning_rate": 3.3768660162588115e-05, "loss": 0.6734908819198608, "step": 6692 }, { "epoch": 8.212269938650307, "grad_norm": 0.3022463619709015, "learning_rate": 3.376395969069023e-05, "loss": 0.6019403338432312, "step": 6693 }, { "epoch": 8.213496932515337, "grad_norm": 0.3118428587913513, "learning_rate": 3.375925886554211e-05, "loss": 0.5858493447303772, "step": 6694 }, { "epoch": 8.214723926380367, "grad_norm": 0.2695325016975403, "learning_rate": 3.375455768733325e-05, "loss": 0.6455275416374207, "step": 6695 }, { "epoch": 8.2159509202454, "grad_norm": 0.21507276594638824, "learning_rate": 3.374985615625315e-05, "loss": 0.5320727825164795, "step": 6696 }, { "epoch": 8.21717791411043, "grad_norm": 0.31556904315948486, "learning_rate": 3.3745154272491296e-05, "loss": 0.6977963447570801, "step": 6697 }, { "epoch": 8.21840490797546, "grad_norm": 0.2792830467224121, "learning_rate": 3.374045203623722e-05, "loss": 0.5416560769081116, "step": 6698 }, { "epoch": 8.21963190184049, "grad_norm": 0.25384122133255005, "learning_rate": 3.3735749447680443e-05, "loss": 0.7534731030464172, "step": 6699 }, { "epoch": 8.220858895705522, "grad_norm": 0.2785661220550537, "learning_rate": 3.373104650701052e-05, "loss": 0.7098309993743896, "step": 6700 }, { "epoch": 8.222085889570552, "grad_norm": 0.2987443804740906, "learning_rate": 3.372634321441702e-05, "loss": 0.49836230278015137, "step": 6701 }, { "epoch": 8.223312883435582, "grad_norm": 0.26510941982269287, "learning_rate": 3.3721639570089514e-05, "loss": 0.8190200924873352, "step": 6702 }, { "epoch": 8.224539877300613, "grad_norm": 0.2957535684108734, "learning_rate": 3.371693557421759e-05, "loss": 0.5897320508956909, "step": 6703 }, { "epoch": 8.225766871165645, "grad_norm": 0.3624507188796997, "learning_rate": 3.371223122699086e-05, "loss": 0.5057144165039062, "step": 6704 }, { "epoch": 8.226993865030675, "grad_norm": 0.2715045213699341, "learning_rate": 3.370752652859892e-05, "loss": 0.540464460849762, "step": 6705 }, { "epoch": 8.228220858895705, "grad_norm": 0.27620163559913635, "learning_rate": 3.370282147923144e-05, "loss": 0.5754119753837585, "step": 6706 }, { "epoch": 8.229447852760735, "grad_norm": 0.3391451835632324, "learning_rate": 3.369811607907803e-05, "loss": 0.7687881588935852, "step": 6707 }, { "epoch": 8.230674846625767, "grad_norm": 0.2706877887248993, "learning_rate": 3.369341032832838e-05, "loss": 0.657241940498352, "step": 6708 }, { "epoch": 8.231901840490798, "grad_norm": 0.3145773410797119, "learning_rate": 3.3688704227172155e-05, "loss": 0.6072114706039429, "step": 6709 }, { "epoch": 8.233128834355828, "grad_norm": 0.29265207052230835, "learning_rate": 3.3683997775799045e-05, "loss": 0.7441777586936951, "step": 6710 }, { "epoch": 8.23435582822086, "grad_norm": 0.2914598286151886, "learning_rate": 3.3679290974398744e-05, "loss": 0.70276939868927, "step": 6711 }, { "epoch": 8.23558282208589, "grad_norm": 0.2009677290916443, "learning_rate": 3.367458382316098e-05, "loss": 0.8174616098403931, "step": 6712 }, { "epoch": 8.23680981595092, "grad_norm": 0.26276350021362305, "learning_rate": 3.3669876322275486e-05, "loss": 0.6643102765083313, "step": 6713 }, { "epoch": 8.23803680981595, "grad_norm": 0.23545657098293304, "learning_rate": 3.3665168471932006e-05, "loss": 0.7230631113052368, "step": 6714 }, { "epoch": 8.239263803680982, "grad_norm": 0.2666560113430023, "learning_rate": 3.366046027232029e-05, "loss": 0.7055518627166748, "step": 6715 }, { "epoch": 8.240490797546013, "grad_norm": 0.2837376892566681, "learning_rate": 3.365575172363013e-05, "loss": 0.6405972242355347, "step": 6716 }, { "epoch": 8.241717791411043, "grad_norm": 0.2613152265548706, "learning_rate": 3.36510428260513e-05, "loss": 0.630521833896637, "step": 6717 }, { "epoch": 8.242944785276073, "grad_norm": 0.2854495644569397, "learning_rate": 3.3646333579773616e-05, "loss": 0.5794699192047119, "step": 6718 }, { "epoch": 8.244171779141105, "grad_norm": 0.1975889503955841, "learning_rate": 3.3641623984986875e-05, "loss": 0.8956570029258728, "step": 6719 }, { "epoch": 8.245398773006135, "grad_norm": 0.31932854652404785, "learning_rate": 3.3636914041880915e-05, "loss": 0.4092731475830078, "step": 6720 }, { "epoch": 8.246625766871166, "grad_norm": 0.2671903967857361, "learning_rate": 3.36322037506456e-05, "loss": 0.606869101524353, "step": 6721 }, { "epoch": 8.247852760736196, "grad_norm": 0.24527034163475037, "learning_rate": 3.362749311147075e-05, "loss": 0.6365504860877991, "step": 6722 }, { "epoch": 8.249079754601228, "grad_norm": 0.3137625455856323, "learning_rate": 3.3622782124546285e-05, "loss": 0.6523054838180542, "step": 6723 }, { "epoch": 8.250306748466258, "grad_norm": 0.29064565896987915, "learning_rate": 3.361807079006205e-05, "loss": 0.6229877471923828, "step": 6724 }, { "epoch": 8.251533742331288, "grad_norm": 0.29055747389793396, "learning_rate": 3.361335910820795e-05, "loss": 0.7541533708572388, "step": 6725 }, { "epoch": 8.252760736196318, "grad_norm": 0.2957076132297516, "learning_rate": 3.360864707917393e-05, "loss": 0.6345815658569336, "step": 6726 }, { "epoch": 8.25398773006135, "grad_norm": 0.2612021267414093, "learning_rate": 3.3603934703149885e-05, "loss": 0.6295641660690308, "step": 6727 }, { "epoch": 8.25521472392638, "grad_norm": 0.26033857464790344, "learning_rate": 3.359922198032578e-05, "loss": 0.6126590371131897, "step": 6728 }, { "epoch": 8.256441717791411, "grad_norm": 0.2206065058708191, "learning_rate": 3.359450891089156e-05, "loss": 0.7174235582351685, "step": 6729 }, { "epoch": 8.257668711656441, "grad_norm": 0.2535147964954376, "learning_rate": 3.35897954950372e-05, "loss": 0.6129845380783081, "step": 6730 }, { "epoch": 8.258895705521473, "grad_norm": 0.2957465648651123, "learning_rate": 3.358508173295268e-05, "loss": 0.649824857711792, "step": 6731 }, { "epoch": 8.260122699386503, "grad_norm": 0.22297504544258118, "learning_rate": 3.358036762482799e-05, "loss": 0.7289206981658936, "step": 6732 }, { "epoch": 8.261349693251534, "grad_norm": 0.29581353068351746, "learning_rate": 3.3575653170853175e-05, "loss": 0.7422802448272705, "step": 6733 }, { "epoch": 8.262576687116564, "grad_norm": 0.29517465829849243, "learning_rate": 3.357093837121823e-05, "loss": 0.6394022703170776, "step": 6734 }, { "epoch": 8.263803680981596, "grad_norm": 0.2566796541213989, "learning_rate": 3.35662232261132e-05, "loss": 0.6964529752731323, "step": 6735 }, { "epoch": 8.265030674846626, "grad_norm": 0.25200584530830383, "learning_rate": 3.356150773572815e-05, "loss": 0.5833248496055603, "step": 6736 }, { "epoch": 8.266257668711656, "grad_norm": 0.2810535728931427, "learning_rate": 3.355679190025314e-05, "loss": 0.6545103192329407, "step": 6737 }, { "epoch": 8.267484662576686, "grad_norm": 0.29442888498306274, "learning_rate": 3.3552075719878243e-05, "loss": 0.5332615375518799, "step": 6738 }, { "epoch": 8.268711656441718, "grad_norm": 0.26995614171028137, "learning_rate": 3.354735919479357e-05, "loss": 0.6369485855102539, "step": 6739 }, { "epoch": 8.269938650306749, "grad_norm": 0.25775477290153503, "learning_rate": 3.354264232518923e-05, "loss": 0.548663854598999, "step": 6740 }, { "epoch": 8.271165644171779, "grad_norm": 0.25401246547698975, "learning_rate": 3.353792511125534e-05, "loss": 0.7659890651702881, "step": 6741 }, { "epoch": 8.27239263803681, "grad_norm": 0.24745619297027588, "learning_rate": 3.3533207553182044e-05, "loss": 0.7075332403182983, "step": 6742 }, { "epoch": 8.273619631901841, "grad_norm": 0.25108394026756287, "learning_rate": 3.352848965115949e-05, "loss": 0.8029298782348633, "step": 6743 }, { "epoch": 8.274846625766871, "grad_norm": 0.2541029751300812, "learning_rate": 3.352377140537783e-05, "loss": 0.7464125156402588, "step": 6744 }, { "epoch": 8.276073619631902, "grad_norm": 0.30866512656211853, "learning_rate": 3.351905281602727e-05, "loss": 0.6466604471206665, "step": 6745 }, { "epoch": 8.277300613496932, "grad_norm": 0.353982537984848, "learning_rate": 3.3514333883297974e-05, "loss": 0.5091512799263, "step": 6746 }, { "epoch": 8.278527607361964, "grad_norm": 0.21639962494373322, "learning_rate": 3.350961460738018e-05, "loss": 0.8471344709396362, "step": 6747 }, { "epoch": 8.279754601226994, "grad_norm": 0.2628757357597351, "learning_rate": 3.350489498846407e-05, "loss": 0.7880324125289917, "step": 6748 }, { "epoch": 8.280981595092024, "grad_norm": 0.2695786654949188, "learning_rate": 3.3500175026739915e-05, "loss": 0.613472580909729, "step": 6749 }, { "epoch": 8.282208588957054, "grad_norm": 0.301096647977829, "learning_rate": 3.349545472239794e-05, "loss": 0.6533947587013245, "step": 6750 }, { "epoch": 8.283435582822086, "grad_norm": 0.27605298161506653, "learning_rate": 3.349073407562842e-05, "loss": 0.6943022012710571, "step": 6751 }, { "epoch": 8.284662576687117, "grad_norm": 0.2897704839706421, "learning_rate": 3.348601308662163e-05, "loss": 0.7027098536491394, "step": 6752 }, { "epoch": 8.285889570552147, "grad_norm": 0.2346908152103424, "learning_rate": 3.3481291755567846e-05, "loss": 0.5926859378814697, "step": 6753 }, { "epoch": 8.287116564417177, "grad_norm": 0.25058919191360474, "learning_rate": 3.347657008265738e-05, "loss": 0.6531057357788086, "step": 6754 }, { "epoch": 8.28834355828221, "grad_norm": 0.21980318427085876, "learning_rate": 3.347184806808056e-05, "loss": 0.6474206447601318, "step": 6755 }, { "epoch": 8.28957055214724, "grad_norm": 0.2981858551502228, "learning_rate": 3.3467125712027705e-05, "loss": 0.8117624521255493, "step": 6756 }, { "epoch": 8.29079754601227, "grad_norm": 0.2905086874961853, "learning_rate": 3.346240301468916e-05, "loss": 0.6294680833816528, "step": 6757 }, { "epoch": 8.2920245398773, "grad_norm": 0.22458559274673462, "learning_rate": 3.3457679976255276e-05, "loss": 0.7712815403938293, "step": 6758 }, { "epoch": 8.293251533742332, "grad_norm": 0.28259649872779846, "learning_rate": 3.345295659691644e-05, "loss": 0.7664395570755005, "step": 6759 }, { "epoch": 8.294478527607362, "grad_norm": 0.29129260778427124, "learning_rate": 3.344823287686303e-05, "loss": 0.7868725657463074, "step": 6760 }, { "epoch": 8.295705521472392, "grad_norm": 0.31464099884033203, "learning_rate": 3.344350881628545e-05, "loss": 0.6525150537490845, "step": 6761 }, { "epoch": 8.296932515337422, "grad_norm": 0.26218295097351074, "learning_rate": 3.343878441537412e-05, "loss": 0.7862377166748047, "step": 6762 }, { "epoch": 8.298159509202454, "grad_norm": 0.22747771441936493, "learning_rate": 3.343405967431945e-05, "loss": 0.6638200283050537, "step": 6763 }, { "epoch": 8.299386503067485, "grad_norm": 0.2692565321922302, "learning_rate": 3.342933459331189e-05, "loss": 0.6387921571731567, "step": 6764 }, { "epoch": 8.300613496932515, "grad_norm": 0.27263060212135315, "learning_rate": 3.3424609172541895e-05, "loss": 0.7229951620101929, "step": 6765 }, { "epoch": 8.301840490797545, "grad_norm": 0.270004004240036, "learning_rate": 3.3419883412199926e-05, "loss": 0.6929746866226196, "step": 6766 }, { "epoch": 8.303067484662577, "grad_norm": 0.2625068426132202, "learning_rate": 3.341515731247648e-05, "loss": 0.768531084060669, "step": 6767 }, { "epoch": 8.304294478527607, "grad_norm": 0.2736439108848572, "learning_rate": 3.3410430873562036e-05, "loss": 0.637697696685791, "step": 6768 }, { "epoch": 8.305521472392638, "grad_norm": 0.270616739988327, "learning_rate": 3.3405704095647125e-05, "loss": 0.7321552634239197, "step": 6769 }, { "epoch": 8.30674846625767, "grad_norm": 0.24004541337490082, "learning_rate": 3.340097697892224e-05, "loss": 0.7638356685638428, "step": 6770 }, { "epoch": 8.3079754601227, "grad_norm": 0.41196268796920776, "learning_rate": 3.339624952357794e-05, "loss": 0.7601132392883301, "step": 6771 }, { "epoch": 8.30920245398773, "grad_norm": 0.24975253641605377, "learning_rate": 3.339152172980476e-05, "loss": 0.6897958517074585, "step": 6772 }, { "epoch": 8.31042944785276, "grad_norm": 0.29166606068611145, "learning_rate": 3.338679359779328e-05, "loss": 0.562045156955719, "step": 6773 }, { "epoch": 8.31165644171779, "grad_norm": 0.29071304202079773, "learning_rate": 3.338206512773407e-05, "loss": 0.5888400077819824, "step": 6774 }, { "epoch": 8.312883435582823, "grad_norm": 0.31821712851524353, "learning_rate": 3.337733631981772e-05, "loss": 0.7293884754180908, "step": 6775 }, { "epoch": 8.314110429447853, "grad_norm": 0.25855782628059387, "learning_rate": 3.337260717423484e-05, "loss": 0.646186351776123, "step": 6776 }, { "epoch": 8.315337423312883, "grad_norm": 0.2931901216506958, "learning_rate": 3.336787769117604e-05, "loss": 0.6815733313560486, "step": 6777 }, { "epoch": 8.316564417177915, "grad_norm": 0.3804801106452942, "learning_rate": 3.336314787083195e-05, "loss": 0.7116038799285889, "step": 6778 }, { "epoch": 8.317791411042945, "grad_norm": 0.27207037806510925, "learning_rate": 3.335841771339323e-05, "loss": 0.7616795301437378, "step": 6779 }, { "epoch": 8.319018404907975, "grad_norm": 0.27775654196739197, "learning_rate": 3.335368721905053e-05, "loss": 0.7374311685562134, "step": 6780 }, { "epoch": 8.320245398773006, "grad_norm": 0.2957114577293396, "learning_rate": 3.334895638799452e-05, "loss": 0.6623249053955078, "step": 6781 }, { "epoch": 8.321472392638038, "grad_norm": 0.2816113829612732, "learning_rate": 3.3344225220415896e-05, "loss": 0.7244608402252197, "step": 6782 }, { "epoch": 8.322699386503068, "grad_norm": 0.2632993161678314, "learning_rate": 3.333949371650535e-05, "loss": 0.5975291132926941, "step": 6783 }, { "epoch": 8.323926380368098, "grad_norm": 0.3016703128814697, "learning_rate": 3.333476187645359e-05, "loss": 0.6316275596618652, "step": 6784 }, { "epoch": 8.325153374233128, "grad_norm": 0.32643774151802063, "learning_rate": 3.3330029700451355e-05, "loss": 0.5141740441322327, "step": 6785 }, { "epoch": 8.32638036809816, "grad_norm": 0.2884974479675293, "learning_rate": 3.3325297188689385e-05, "loss": 0.606253445148468, "step": 6786 }, { "epoch": 8.32760736196319, "grad_norm": 0.2539242208003998, "learning_rate": 3.332056434135843e-05, "loss": 0.6872765421867371, "step": 6787 }, { "epoch": 8.32883435582822, "grad_norm": 0.27653801441192627, "learning_rate": 3.331583115864925e-05, "loss": 0.6303043365478516, "step": 6788 }, { "epoch": 8.330061349693251, "grad_norm": 0.2557709217071533, "learning_rate": 3.331109764075264e-05, "loss": 0.6191487312316895, "step": 6789 }, { "epoch": 8.331288343558283, "grad_norm": 0.2664782702922821, "learning_rate": 3.3306363787859384e-05, "loss": 0.5458509922027588, "step": 6790 }, { "epoch": 8.332515337423313, "grad_norm": 0.287351131439209, "learning_rate": 3.33016296001603e-05, "loss": 0.7716677784919739, "step": 6791 }, { "epoch": 8.333742331288343, "grad_norm": 0.30675700306892395, "learning_rate": 3.329689507784619e-05, "loss": 0.6368848085403442, "step": 6792 }, { "epoch": 8.334969325153374, "grad_norm": 0.2828806936740875, "learning_rate": 3.329216022110792e-05, "loss": 0.629472017288208, "step": 6793 }, { "epoch": 8.336196319018406, "grad_norm": 0.2736596465110779, "learning_rate": 3.328742503013631e-05, "loss": 0.6716510057449341, "step": 6794 }, { "epoch": 8.337423312883436, "grad_norm": 0.2309499830007553, "learning_rate": 3.328268950512224e-05, "loss": 0.6355172991752625, "step": 6795 }, { "epoch": 8.338650306748466, "grad_norm": 0.24795493483543396, "learning_rate": 3.3277953646256574e-05, "loss": 0.6554034948348999, "step": 6796 }, { "epoch": 8.339877300613496, "grad_norm": 0.23664608597755432, "learning_rate": 3.327321745373021e-05, "loss": 0.9044806957244873, "step": 6797 }, { "epoch": 8.341104294478528, "grad_norm": 0.26504820585250854, "learning_rate": 3.3268480927734046e-05, "loss": 0.7116742134094238, "step": 6798 }, { "epoch": 8.342331288343559, "grad_norm": 0.28438249230384827, "learning_rate": 3.3263744068459e-05, "loss": 0.8381179571151733, "step": 6799 }, { "epoch": 8.343558282208589, "grad_norm": 0.28108111023902893, "learning_rate": 3.3259006876095994e-05, "loss": 0.6792932748794556, "step": 6800 }, { "epoch": 8.344785276073619, "grad_norm": 0.2519056797027588, "learning_rate": 3.325426935083598e-05, "loss": 0.8719074726104736, "step": 6801 }, { "epoch": 8.346012269938651, "grad_norm": 0.2687361538410187, "learning_rate": 3.3249531492869904e-05, "loss": 0.7779568433761597, "step": 6802 }, { "epoch": 8.347239263803681, "grad_norm": 0.32046428322792053, "learning_rate": 3.3244793302388754e-05, "loss": 0.6888095140457153, "step": 6803 }, { "epoch": 8.348466257668711, "grad_norm": 0.20756666362285614, "learning_rate": 3.324005477958349e-05, "loss": 0.6688015460968018, "step": 6804 }, { "epoch": 8.349693251533742, "grad_norm": 0.23966442048549652, "learning_rate": 3.3235315924645124e-05, "loss": 0.8277756571769714, "step": 6805 }, { "epoch": 8.350920245398774, "grad_norm": 0.31466835737228394, "learning_rate": 3.3230576737764654e-05, "loss": 0.637997031211853, "step": 6806 }, { "epoch": 8.352147239263804, "grad_norm": 0.2582821249961853, "learning_rate": 3.322583721913311e-05, "loss": 0.704329252243042, "step": 6807 }, { "epoch": 8.353374233128834, "grad_norm": 0.26726529002189636, "learning_rate": 3.322109736894154e-05, "loss": 0.643454909324646, "step": 6808 }, { "epoch": 8.354601226993864, "grad_norm": 0.33082669973373413, "learning_rate": 3.321635718738098e-05, "loss": 0.6030884981155396, "step": 6809 }, { "epoch": 8.355828220858896, "grad_norm": 0.2626780867576599, "learning_rate": 3.32116166746425e-05, "loss": 0.697603702545166, "step": 6810 }, { "epoch": 8.357055214723927, "grad_norm": 0.2608177065849304, "learning_rate": 3.320687583091716e-05, "loss": 0.5665246844291687, "step": 6811 }, { "epoch": 8.358282208588957, "grad_norm": 0.2489113211631775, "learning_rate": 3.3202134656396065e-05, "loss": 0.8280107975006104, "step": 6812 }, { "epoch": 8.359509202453987, "grad_norm": 0.268144816160202, "learning_rate": 3.319739315127032e-05, "loss": 0.5545775890350342, "step": 6813 }, { "epoch": 8.360736196319019, "grad_norm": 0.2958994507789612, "learning_rate": 3.3192651315731034e-05, "loss": 0.6225358247756958, "step": 6814 }, { "epoch": 8.36196319018405, "grad_norm": 0.3209339678287506, "learning_rate": 3.3187909149969344e-05, "loss": 0.6197903752326965, "step": 6815 }, { "epoch": 8.36319018404908, "grad_norm": 0.3204484283924103, "learning_rate": 3.318316665417638e-05, "loss": 0.40991640090942383, "step": 6816 }, { "epoch": 8.36441717791411, "grad_norm": 0.2489742785692215, "learning_rate": 3.3178423828543306e-05, "loss": 0.756980299949646, "step": 6817 }, { "epoch": 8.365644171779142, "grad_norm": 0.26476675271987915, "learning_rate": 3.31736806732613e-05, "loss": 0.757454514503479, "step": 6818 }, { "epoch": 8.366871165644172, "grad_norm": 0.2688039541244507, "learning_rate": 3.3168937188521536e-05, "loss": 0.7922211289405823, "step": 6819 }, { "epoch": 8.368098159509202, "grad_norm": 0.23767834901809692, "learning_rate": 3.3164193374515215e-05, "loss": 0.5600321292877197, "step": 6820 }, { "epoch": 8.369325153374232, "grad_norm": 0.19961827993392944, "learning_rate": 3.3159449231433545e-05, "loss": 0.8654745221138, "step": 6821 }, { "epoch": 8.370552147239264, "grad_norm": 0.2420952320098877, "learning_rate": 3.3154704759467746e-05, "loss": 0.7009106278419495, "step": 6822 }, { "epoch": 8.371779141104295, "grad_norm": 0.26671263575553894, "learning_rate": 3.314995995880905e-05, "loss": 0.6385812163352966, "step": 6823 }, { "epoch": 8.373006134969325, "grad_norm": 0.31518450379371643, "learning_rate": 3.314521482964872e-05, "loss": 0.6697258949279785, "step": 6824 }, { "epoch": 8.374233128834355, "grad_norm": 0.26363980770111084, "learning_rate": 3.314046937217801e-05, "loss": 0.6569100022315979, "step": 6825 }, { "epoch": 8.375460122699387, "grad_norm": 0.24223342537879944, "learning_rate": 3.31357235865882e-05, "loss": 0.8206428289413452, "step": 6826 }, { "epoch": 8.376687116564417, "grad_norm": 0.23969845473766327, "learning_rate": 3.3130977473070566e-05, "loss": 0.597691535949707, "step": 6827 }, { "epoch": 8.377914110429447, "grad_norm": 0.2871536314487457, "learning_rate": 3.312623103181642e-05, "loss": 0.6584419012069702, "step": 6828 }, { "epoch": 8.379141104294478, "grad_norm": 0.2572406828403473, "learning_rate": 3.3121484263017094e-05, "loss": 0.6744881272315979, "step": 6829 }, { "epoch": 8.38036809815951, "grad_norm": 0.24224643409252167, "learning_rate": 3.311673716686389e-05, "loss": 0.784794270992279, "step": 6830 }, { "epoch": 8.38159509202454, "grad_norm": 0.3483116328716278, "learning_rate": 3.3111989743548155e-05, "loss": 0.5594790577888489, "step": 6831 }, { "epoch": 8.38282208588957, "grad_norm": 0.28668248653411865, "learning_rate": 3.3107241993261265e-05, "loss": 0.7928224205970764, "step": 6832 }, { "epoch": 8.3840490797546, "grad_norm": 0.3720575273036957, "learning_rate": 3.3102493916194554e-05, "loss": 0.6365042328834534, "step": 6833 }, { "epoch": 8.385276073619632, "grad_norm": 0.25506100058555603, "learning_rate": 3.309774551253944e-05, "loss": 0.7701928019523621, "step": 6834 }, { "epoch": 8.386503067484663, "grad_norm": 0.22410224378108978, "learning_rate": 3.309299678248729e-05, "loss": 0.5715579986572266, "step": 6835 }, { "epoch": 8.387730061349693, "grad_norm": 0.26444974541664124, "learning_rate": 3.308824772622952e-05, "loss": 0.6328089237213135, "step": 6836 }, { "epoch": 8.388957055214725, "grad_norm": 0.2561246156692505, "learning_rate": 3.308349834395756e-05, "loss": 0.7717717289924622, "step": 6837 }, { "epoch": 8.390184049079755, "grad_norm": 1.4973291158676147, "learning_rate": 3.3078748635862826e-05, "loss": 0.7149530649185181, "step": 6838 }, { "epoch": 8.391411042944785, "grad_norm": 0.21142350137233734, "learning_rate": 3.3073998602136786e-05, "loss": 0.7479966878890991, "step": 6839 }, { "epoch": 8.392638036809815, "grad_norm": 0.3231164515018463, "learning_rate": 3.306924824297088e-05, "loss": 0.6012017726898193, "step": 6840 }, { "epoch": 8.393865030674847, "grad_norm": 0.2503679394721985, "learning_rate": 3.3064497558556605e-05, "loss": 0.8317670226097107, "step": 6841 }, { "epoch": 8.395092024539878, "grad_norm": 0.2751319706439972, "learning_rate": 3.3059746549085426e-05, "loss": 0.5729683637619019, "step": 6842 }, { "epoch": 8.396319018404908, "grad_norm": 0.2823094427585602, "learning_rate": 3.3054995214748853e-05, "loss": 0.5807999968528748, "step": 6843 }, { "epoch": 8.397546012269938, "grad_norm": 0.26438573002815247, "learning_rate": 3.30502435557384e-05, "loss": 0.9487792253494263, "step": 6844 }, { "epoch": 8.39877300613497, "grad_norm": 0.3420044779777527, "learning_rate": 3.304549157224558e-05, "loss": 0.5838254690170288, "step": 6845 }, { "epoch": 8.4, "grad_norm": 0.3354440927505493, "learning_rate": 3.3040739264461945e-05, "loss": 0.6087503433227539, "step": 6846 }, { "epoch": 8.40122699386503, "grad_norm": 0.278828889131546, "learning_rate": 3.303598663257904e-05, "loss": 0.6895203590393066, "step": 6847 }, { "epoch": 8.40245398773006, "grad_norm": 0.31158581376075745, "learning_rate": 3.303123367678844e-05, "loss": 0.6139966249465942, "step": 6848 }, { "epoch": 8.403680981595093, "grad_norm": 0.2282399982213974, "learning_rate": 3.3026480397281713e-05, "loss": 0.9141048192977905, "step": 6849 }, { "epoch": 8.404907975460123, "grad_norm": 0.2780526876449585, "learning_rate": 3.302172679425045e-05, "loss": 0.9003757238388062, "step": 6850 }, { "epoch": 8.406134969325153, "grad_norm": 0.31946974992752075, "learning_rate": 3.3016972867886264e-05, "loss": 0.5120909214019775, "step": 6851 }, { "epoch": 8.407361963190183, "grad_norm": 0.2758537530899048, "learning_rate": 3.3012218618380765e-05, "loss": 0.699053943157196, "step": 6852 }, { "epoch": 8.408588957055215, "grad_norm": 0.22216282784938812, "learning_rate": 3.300746404592559e-05, "loss": 0.8141501545906067, "step": 6853 }, { "epoch": 8.409815950920246, "grad_norm": 0.32209283113479614, "learning_rate": 3.300270915071237e-05, "loss": 0.6272728443145752, "step": 6854 }, { "epoch": 8.411042944785276, "grad_norm": 0.2735273838043213, "learning_rate": 3.299795393293277e-05, "loss": 0.7291398048400879, "step": 6855 }, { "epoch": 8.412269938650306, "grad_norm": 0.27613046765327454, "learning_rate": 3.2993198392778464e-05, "loss": 0.6279063820838928, "step": 6856 }, { "epoch": 8.413496932515338, "grad_norm": 0.32797035574913025, "learning_rate": 3.298844253044112e-05, "loss": 0.5688736438751221, "step": 6857 }, { "epoch": 8.414723926380368, "grad_norm": 0.2656906843185425, "learning_rate": 3.2983686346112444e-05, "loss": 0.6109166145324707, "step": 6858 }, { "epoch": 8.415950920245399, "grad_norm": 0.27523675560951233, "learning_rate": 3.297892983998414e-05, "loss": 0.583909273147583, "step": 6859 }, { "epoch": 8.417177914110429, "grad_norm": 0.3137913942337036, "learning_rate": 3.297417301224793e-05, "loss": 0.5436312556266785, "step": 6860 }, { "epoch": 8.41840490797546, "grad_norm": 0.3696512281894684, "learning_rate": 3.2969415863095556e-05, "loss": 0.5721584558486938, "step": 6861 }, { "epoch": 8.419631901840491, "grad_norm": 0.2358524054288864, "learning_rate": 3.2964658392718754e-05, "loss": 0.711227536201477, "step": 6862 }, { "epoch": 8.420858895705521, "grad_norm": 0.31351014971733093, "learning_rate": 3.295990060130928e-05, "loss": 0.8774079084396362, "step": 6863 }, { "epoch": 8.422085889570551, "grad_norm": 0.3566444218158722, "learning_rate": 3.295514248905893e-05, "loss": 0.4929811358451843, "step": 6864 }, { "epoch": 8.423312883435583, "grad_norm": 0.2720397114753723, "learning_rate": 3.295038405615947e-05, "loss": 0.6345141530036926, "step": 6865 }, { "epoch": 8.424539877300614, "grad_norm": 0.28000783920288086, "learning_rate": 3.29456253028027e-05, "loss": 0.5444214940071106, "step": 6866 }, { "epoch": 8.425766871165644, "grad_norm": 0.26116007566452026, "learning_rate": 3.294086622918043e-05, "loss": 0.7604440450668335, "step": 6867 }, { "epoch": 8.426993865030674, "grad_norm": 0.3315050005912781, "learning_rate": 3.29361068354845e-05, "loss": 0.6169566512107849, "step": 6868 }, { "epoch": 8.428220858895706, "grad_norm": 0.25664687156677246, "learning_rate": 3.293134712190674e-05, "loss": 0.7353286743164062, "step": 6869 }, { "epoch": 8.429447852760736, "grad_norm": 0.2853755056858063, "learning_rate": 3.292658708863899e-05, "loss": 0.5848768949508667, "step": 6870 }, { "epoch": 8.430674846625767, "grad_norm": 0.2910304367542267, "learning_rate": 3.292182673587313e-05, "loss": 0.6378909349441528, "step": 6871 }, { "epoch": 8.431901840490797, "grad_norm": 0.3065175712108612, "learning_rate": 3.291706606380102e-05, "loss": 0.6217737197875977, "step": 6872 }, { "epoch": 8.433128834355829, "grad_norm": 0.3664186894893646, "learning_rate": 3.291230507261456e-05, "loss": 0.5989587306976318, "step": 6873 }, { "epoch": 8.434355828220859, "grad_norm": 0.35173681378364563, "learning_rate": 3.290754376250565e-05, "loss": 0.7335994243621826, "step": 6874 }, { "epoch": 8.43558282208589, "grad_norm": 0.25829946994781494, "learning_rate": 3.290278213366621e-05, "loss": 0.6005051732063293, "step": 6875 }, { "epoch": 8.43680981595092, "grad_norm": 0.2636898159980774, "learning_rate": 3.2898020186288156e-05, "loss": 0.5582277774810791, "step": 6876 }, { "epoch": 8.438036809815952, "grad_norm": 0.2557462751865387, "learning_rate": 3.289325792056343e-05, "loss": 0.7812535762786865, "step": 6877 }, { "epoch": 8.439263803680982, "grad_norm": 0.28639423847198486, "learning_rate": 3.2888495336684e-05, "loss": 0.5415986180305481, "step": 6878 }, { "epoch": 8.440490797546012, "grad_norm": 0.2537548840045929, "learning_rate": 3.288373243484181e-05, "loss": 0.8073728680610657, "step": 6879 }, { "epoch": 8.441717791411042, "grad_norm": 0.2968222200870514, "learning_rate": 3.287896921522886e-05, "loss": 0.5729730129241943, "step": 6880 }, { "epoch": 8.442944785276074, "grad_norm": 0.29733166098594666, "learning_rate": 3.2874205678037126e-05, "loss": 0.5421167016029358, "step": 6881 }, { "epoch": 8.444171779141104, "grad_norm": 0.2674369812011719, "learning_rate": 3.2869441823458624e-05, "loss": 0.6841582655906677, "step": 6882 }, { "epoch": 8.445398773006135, "grad_norm": 0.225435271859169, "learning_rate": 3.286467765168536e-05, "loss": 0.7755117416381836, "step": 6883 }, { "epoch": 8.446625766871165, "grad_norm": 0.3188092112541199, "learning_rate": 3.2859913162909375e-05, "loss": 0.5403130054473877, "step": 6884 }, { "epoch": 8.447852760736197, "grad_norm": 0.2829863131046295, "learning_rate": 3.285514835732271e-05, "loss": 0.5256130695343018, "step": 6885 }, { "epoch": 8.449079754601227, "grad_norm": 0.2630830407142639, "learning_rate": 3.285038323511741e-05, "loss": 0.703068196773529, "step": 6886 }, { "epoch": 8.450306748466257, "grad_norm": 0.31204378604888916, "learning_rate": 3.284561779648556e-05, "loss": 0.6648135781288147, "step": 6887 }, { "epoch": 8.451533742331288, "grad_norm": 0.24742747843265533, "learning_rate": 3.284085204161923e-05, "loss": 0.8074185252189636, "step": 6888 }, { "epoch": 8.45276073619632, "grad_norm": 0.33030959963798523, "learning_rate": 3.283608597071052e-05, "loss": 0.6990956664085388, "step": 6889 }, { "epoch": 8.45398773006135, "grad_norm": 0.2661587595939636, "learning_rate": 3.283131958395153e-05, "loss": 0.8944828510284424, "step": 6890 }, { "epoch": 8.45521472392638, "grad_norm": 0.3094131648540497, "learning_rate": 3.2826552881534385e-05, "loss": 0.705947756767273, "step": 6891 }, { "epoch": 8.45644171779141, "grad_norm": 0.2747354805469513, "learning_rate": 3.2821785863651216e-05, "loss": 0.6078143119812012, "step": 6892 }, { "epoch": 8.457668711656442, "grad_norm": 0.2852465510368347, "learning_rate": 3.2817018530494164e-05, "loss": 0.7485017776489258, "step": 6893 }, { "epoch": 8.458895705521472, "grad_norm": 0.22807703912258148, "learning_rate": 3.281225088225539e-05, "loss": 0.8473002910614014, "step": 6894 }, { "epoch": 8.460122699386503, "grad_norm": 0.29161641001701355, "learning_rate": 3.2807482919127064e-05, "loss": 0.6884573698043823, "step": 6895 }, { "epoch": 8.461349693251535, "grad_norm": 0.35390129685401917, "learning_rate": 3.2802714641301365e-05, "loss": 0.5616928935050964, "step": 6896 }, { "epoch": 8.462576687116565, "grad_norm": 0.23918503522872925, "learning_rate": 3.2797946048970504e-05, "loss": 0.7726519703865051, "step": 6897 }, { "epoch": 8.463803680981595, "grad_norm": 0.2892306447029114, "learning_rate": 3.279317714232667e-05, "loss": 0.6289287805557251, "step": 6898 }, { "epoch": 8.465030674846625, "grad_norm": 0.2631089687347412, "learning_rate": 3.27884079215621e-05, "loss": 0.716208279132843, "step": 6899 }, { "epoch": 8.466257668711656, "grad_norm": 0.3093980848789215, "learning_rate": 3.278363838686902e-05, "loss": 0.6743243932723999, "step": 6900 }, { "epoch": 8.467484662576688, "grad_norm": 0.27946606278419495, "learning_rate": 3.2778868538439666e-05, "loss": 0.6573041677474976, "step": 6901 }, { "epoch": 8.468711656441718, "grad_norm": 0.2395710051059723, "learning_rate": 3.277409837646632e-05, "loss": 0.7981598377227783, "step": 6902 }, { "epoch": 8.469938650306748, "grad_norm": 0.25345757603645325, "learning_rate": 3.276932790114123e-05, "loss": 0.6642467975616455, "step": 6903 }, { "epoch": 8.47116564417178, "grad_norm": 0.37793728709220886, "learning_rate": 3.2764557112656694e-05, "loss": 0.6030772924423218, "step": 6904 }, { "epoch": 8.47239263803681, "grad_norm": 0.21817874908447266, "learning_rate": 3.2759786011205e-05, "loss": 0.7236943244934082, "step": 6905 }, { "epoch": 8.47361963190184, "grad_norm": 0.3048180043697357, "learning_rate": 3.275501459697847e-05, "loss": 0.5874910354614258, "step": 6906 }, { "epoch": 8.47484662576687, "grad_norm": 0.24081702530384064, "learning_rate": 3.275024287016942e-05, "loss": 0.5872993469238281, "step": 6907 }, { "epoch": 8.476073619631903, "grad_norm": 0.31692859530448914, "learning_rate": 3.2745470830970185e-05, "loss": 0.6735856533050537, "step": 6908 }, { "epoch": 8.477300613496933, "grad_norm": 0.26205894351005554, "learning_rate": 3.274069847957311e-05, "loss": 0.5601888298988342, "step": 6909 }, { "epoch": 8.478527607361963, "grad_norm": 0.2880280315876007, "learning_rate": 3.273592581617056e-05, "loss": 0.6624742746353149, "step": 6910 }, { "epoch": 8.479754601226993, "grad_norm": 0.2944352626800537, "learning_rate": 3.27311528409549e-05, "loss": 0.6080479025840759, "step": 6911 }, { "epoch": 8.480981595092025, "grad_norm": 0.24595209956169128, "learning_rate": 3.272637955411853e-05, "loss": 0.8087565898895264, "step": 6912 }, { "epoch": 8.482208588957056, "grad_norm": 0.3679983913898468, "learning_rate": 3.272160595585382e-05, "loss": 0.6121112108230591, "step": 6913 }, { "epoch": 8.483435582822086, "grad_norm": 0.30279430747032166, "learning_rate": 3.27168320463532e-05, "loss": 0.5766934156417847, "step": 6914 }, { "epoch": 8.484662576687116, "grad_norm": 0.2673328220844269, "learning_rate": 3.27120578258091e-05, "loss": 0.8236984014511108, "step": 6915 }, { "epoch": 8.485889570552148, "grad_norm": 0.32118043303489685, "learning_rate": 3.2707283294413926e-05, "loss": 0.5434558391571045, "step": 6916 }, { "epoch": 8.487116564417178, "grad_norm": 0.6486950516700745, "learning_rate": 3.270250845236016e-05, "loss": 0.7852227687835693, "step": 6917 }, { "epoch": 8.488343558282208, "grad_norm": 0.21856823563575745, "learning_rate": 3.269773329984024e-05, "loss": 0.7257921695709229, "step": 6918 }, { "epoch": 8.489570552147239, "grad_norm": 0.2584337294101715, "learning_rate": 3.269295783704665e-05, "loss": 0.721774160861969, "step": 6919 }, { "epoch": 8.49079754601227, "grad_norm": 0.31678709387779236, "learning_rate": 3.268818206417186e-05, "loss": 0.5488506555557251, "step": 6920 }, { "epoch": 8.4920245398773, "grad_norm": 0.3236719071865082, "learning_rate": 3.268340598140839e-05, "loss": 0.44961321353912354, "step": 6921 }, { "epoch": 8.493251533742331, "grad_norm": 0.3447295129299164, "learning_rate": 3.267862958894873e-05, "loss": 0.6401456594467163, "step": 6922 }, { "epoch": 8.494478527607361, "grad_norm": 0.2541218400001526, "learning_rate": 3.267385288698542e-05, "loss": 0.6410914659500122, "step": 6923 }, { "epoch": 8.495705521472393, "grad_norm": 0.28891587257385254, "learning_rate": 3.266907587571098e-05, "loss": 0.7306896448135376, "step": 6924 }, { "epoch": 8.496932515337424, "grad_norm": 0.2849723696708679, "learning_rate": 3.266429855531797e-05, "loss": 0.6111994981765747, "step": 6925 }, { "epoch": 8.498159509202454, "grad_norm": 0.32464441657066345, "learning_rate": 3.2659520925998945e-05, "loss": 0.5425902605056763, "step": 6926 }, { "epoch": 8.499386503067484, "grad_norm": 0.2896862328052521, "learning_rate": 3.2654742987946466e-05, "loss": 0.8383324146270752, "step": 6927 }, { "epoch": 8.500613496932516, "grad_norm": 0.2821482717990875, "learning_rate": 3.264996474135314e-05, "loss": 0.6589975357055664, "step": 6928 }, { "epoch": 8.501840490797546, "grad_norm": 0.2505096197128296, "learning_rate": 3.264518618641155e-05, "loss": 0.9371036291122437, "step": 6929 }, { "epoch": 8.503067484662576, "grad_norm": 0.27443528175354004, "learning_rate": 3.2640407323314305e-05, "loss": 0.7628813982009888, "step": 6930 }, { "epoch": 8.504294478527607, "grad_norm": 0.3410767614841461, "learning_rate": 3.2635628152254036e-05, "loss": 0.4860408902168274, "step": 6931 }, { "epoch": 8.505521472392639, "grad_norm": 0.26262417435646057, "learning_rate": 3.263084867342338e-05, "loss": 0.7543997764587402, "step": 6932 }, { "epoch": 8.506748466257669, "grad_norm": 0.2998946011066437, "learning_rate": 3.262606888701498e-05, "loss": 0.7307718992233276, "step": 6933 }, { "epoch": 8.5079754601227, "grad_norm": 0.22819097340106964, "learning_rate": 3.262128879322148e-05, "loss": 0.825494110584259, "step": 6934 }, { "epoch": 8.50920245398773, "grad_norm": 0.2580135464668274, "learning_rate": 3.2616508392235564e-05, "loss": 0.7169044613838196, "step": 6935 }, { "epoch": 8.510429447852761, "grad_norm": 0.295399010181427, "learning_rate": 3.261172768424993e-05, "loss": 0.8829184770584106, "step": 6936 }, { "epoch": 8.511656441717792, "grad_norm": 0.3718682825565338, "learning_rate": 3.2606946669457254e-05, "loss": 0.7487002611160278, "step": 6937 }, { "epoch": 8.512883435582822, "grad_norm": 0.2561943233013153, "learning_rate": 3.260216534805026e-05, "loss": 0.747402548789978, "step": 6938 }, { "epoch": 8.514110429447852, "grad_norm": 0.29854094982147217, "learning_rate": 3.259738372022166e-05, "loss": 0.688715934753418, "step": 6939 }, { "epoch": 8.515337423312884, "grad_norm": 0.3148449659347534, "learning_rate": 3.259260178616419e-05, "loss": 0.5525014400482178, "step": 6940 }, { "epoch": 8.516564417177914, "grad_norm": 0.23858000338077545, "learning_rate": 3.25878195460706e-05, "loss": 0.6877691149711609, "step": 6941 }, { "epoch": 8.517791411042944, "grad_norm": 0.3019903600215912, "learning_rate": 3.258303700013364e-05, "loss": 0.6686210036277771, "step": 6942 }, { "epoch": 8.519018404907975, "grad_norm": 0.3441215753555298, "learning_rate": 3.2578254148546086e-05, "loss": 0.700882077217102, "step": 6943 }, { "epoch": 8.520245398773007, "grad_norm": 0.3544115424156189, "learning_rate": 3.257347099150072e-05, "loss": 0.40418124198913574, "step": 6944 }, { "epoch": 8.521472392638037, "grad_norm": 0.29695025086402893, "learning_rate": 3.256868752919034e-05, "loss": 0.787812352180481, "step": 6945 }, { "epoch": 8.522699386503067, "grad_norm": 0.22728899121284485, "learning_rate": 3.2563903761807755e-05, "loss": 0.6755284070968628, "step": 6946 }, { "epoch": 8.523926380368097, "grad_norm": 0.30574554204940796, "learning_rate": 3.255911968954579e-05, "loss": 0.5362036824226379, "step": 6947 }, { "epoch": 8.52515337423313, "grad_norm": 0.27208516001701355, "learning_rate": 3.255433531259726e-05, "loss": 0.6812742352485657, "step": 6948 }, { "epoch": 8.52638036809816, "grad_norm": 0.2075551301240921, "learning_rate": 3.2549550631155014e-05, "loss": 0.6525472402572632, "step": 6949 }, { "epoch": 8.52760736196319, "grad_norm": 0.25861406326293945, "learning_rate": 3.2544765645411924e-05, "loss": 0.553755521774292, "step": 6950 }, { "epoch": 8.52883435582822, "grad_norm": 0.2935419976711273, "learning_rate": 3.253998035556084e-05, "loss": 0.7539310455322266, "step": 6951 }, { "epoch": 8.530061349693252, "grad_norm": 0.3159336447715759, "learning_rate": 3.253519476179466e-05, "loss": 0.7373028993606567, "step": 6952 }, { "epoch": 8.531288343558282, "grad_norm": 0.30200448632240295, "learning_rate": 3.253040886430627e-05, "loss": 0.4588841199874878, "step": 6953 }, { "epoch": 8.532515337423312, "grad_norm": 0.26443052291870117, "learning_rate": 3.2525622663288576e-05, "loss": 0.8286416530609131, "step": 6954 }, { "epoch": 8.533742331288344, "grad_norm": 0.24348165094852448, "learning_rate": 3.252083615893449e-05, "loss": 0.5842919945716858, "step": 6955 }, { "epoch": 8.534969325153375, "grad_norm": 0.3293142020702362, "learning_rate": 3.251604935143696e-05, "loss": 0.564637303352356, "step": 6956 }, { "epoch": 8.536196319018405, "grad_norm": 0.3360094428062439, "learning_rate": 3.251126224098891e-05, "loss": 0.46281543374061584, "step": 6957 }, { "epoch": 8.537423312883435, "grad_norm": 0.31680402159690857, "learning_rate": 3.2506474827783304e-05, "loss": 0.5972644090652466, "step": 6958 }, { "epoch": 8.538650306748465, "grad_norm": 0.28107428550720215, "learning_rate": 3.25016871120131e-05, "loss": 0.6972420811653137, "step": 6959 }, { "epoch": 8.539877300613497, "grad_norm": 0.26016899943351746, "learning_rate": 3.249689909387129e-05, "loss": 0.6959788203239441, "step": 6960 }, { "epoch": 8.541104294478528, "grad_norm": 0.3096902668476105, "learning_rate": 3.2492110773550854e-05, "loss": 0.6755812168121338, "step": 6961 }, { "epoch": 8.542331288343558, "grad_norm": 0.2727472484111786, "learning_rate": 3.2487322151244804e-05, "loss": 0.8023788928985596, "step": 6962 }, { "epoch": 8.54355828220859, "grad_norm": 0.32332953810691833, "learning_rate": 3.248253322714615e-05, "loss": 0.5758371949195862, "step": 6963 }, { "epoch": 8.54478527607362, "grad_norm": 0.26343396306037903, "learning_rate": 3.247774400144793e-05, "loss": 0.5790438652038574, "step": 6964 }, { "epoch": 8.54601226993865, "grad_norm": 0.24694867432117462, "learning_rate": 3.247295447434317e-05, "loss": 0.69917893409729, "step": 6965 }, { "epoch": 8.54723926380368, "grad_norm": 0.3170101046562195, "learning_rate": 3.246816464602494e-05, "loss": 0.4412153959274292, "step": 6966 }, { "epoch": 8.548466257668712, "grad_norm": 0.2847159802913666, "learning_rate": 3.246337451668628e-05, "loss": 0.7866398692131042, "step": 6967 }, { "epoch": 8.549693251533743, "grad_norm": 0.26331591606140137, "learning_rate": 3.245858408652028e-05, "loss": 0.7164682149887085, "step": 6968 }, { "epoch": 8.550920245398773, "grad_norm": 0.22873997688293457, "learning_rate": 3.245379335572003e-05, "loss": 0.628818154335022, "step": 6969 }, { "epoch": 8.552147239263803, "grad_norm": 0.3249559998512268, "learning_rate": 3.2449002324478636e-05, "loss": 0.5962530374526978, "step": 6970 }, { "epoch": 8.553374233128835, "grad_norm": 0.3337686359882355, "learning_rate": 3.24442109929892e-05, "loss": 0.5270641446113586, "step": 6971 }, { "epoch": 8.554601226993865, "grad_norm": 0.24015535414218903, "learning_rate": 3.243941936144485e-05, "loss": 0.7015825510025024, "step": 6972 }, { "epoch": 8.555828220858896, "grad_norm": 0.3246702551841736, "learning_rate": 3.243462743003871e-05, "loss": 0.6179143190383911, "step": 6973 }, { "epoch": 8.557055214723926, "grad_norm": 0.2567159831523895, "learning_rate": 3.242983519896396e-05, "loss": 0.48384082317352295, "step": 6974 }, { "epoch": 8.558282208588958, "grad_norm": 0.2941300570964813, "learning_rate": 3.242504266841373e-05, "loss": 0.5380802750587463, "step": 6975 }, { "epoch": 8.559509202453988, "grad_norm": 0.24786177277565002, "learning_rate": 3.242024983858122e-05, "loss": 0.6360188722610474, "step": 6976 }, { "epoch": 8.560736196319018, "grad_norm": 0.28909724950790405, "learning_rate": 3.24154567096596e-05, "loss": 0.7444230318069458, "step": 6977 }, { "epoch": 8.561963190184048, "grad_norm": 0.30108505487442017, "learning_rate": 3.241066328184207e-05, "loss": 0.616851806640625, "step": 6978 }, { "epoch": 8.56319018404908, "grad_norm": 0.36423107981681824, "learning_rate": 3.240586955532183e-05, "loss": 0.4376094937324524, "step": 6979 }, { "epoch": 8.56441717791411, "grad_norm": 0.254830002784729, "learning_rate": 3.240107553029212e-05, "loss": 0.7609912753105164, "step": 6980 }, { "epoch": 8.565644171779141, "grad_norm": 0.26507800817489624, "learning_rate": 3.2396281206946156e-05, "loss": 0.6401017904281616, "step": 6981 }, { "epoch": 8.566871165644171, "grad_norm": 0.22182145714759827, "learning_rate": 3.23914865854772e-05, "loss": 0.8053158521652222, "step": 6982 }, { "epoch": 8.568098159509203, "grad_norm": 0.28129827976226807, "learning_rate": 3.238669166607849e-05, "loss": 0.7188737392425537, "step": 6983 }, { "epoch": 8.569325153374233, "grad_norm": 0.2542992830276489, "learning_rate": 3.238189644894332e-05, "loss": 0.6957148313522339, "step": 6984 }, { "epoch": 8.570552147239264, "grad_norm": 0.2368539720773697, "learning_rate": 3.2377100934264946e-05, "loss": 0.905001163482666, "step": 6985 }, { "epoch": 8.571779141104294, "grad_norm": 0.2284119427204132, "learning_rate": 3.237230512223668e-05, "loss": 0.8004463911056519, "step": 6986 }, { "epoch": 8.573006134969326, "grad_norm": 0.30151793360710144, "learning_rate": 3.236750901305182e-05, "loss": 0.6408600211143494, "step": 6987 }, { "epoch": 8.574233128834356, "grad_norm": 0.23289458453655243, "learning_rate": 3.236271260690368e-05, "loss": 0.7147018909454346, "step": 6988 }, { "epoch": 8.575460122699386, "grad_norm": 0.2722586393356323, "learning_rate": 3.2357915903985605e-05, "loss": 0.7432997226715088, "step": 6989 }, { "epoch": 8.576687116564417, "grad_norm": 0.273695170879364, "learning_rate": 3.2353118904490915e-05, "loss": 0.6861681938171387, "step": 6990 }, { "epoch": 8.577914110429449, "grad_norm": 0.3354070782661438, "learning_rate": 3.2348321608612974e-05, "loss": 0.6234349012374878, "step": 6991 }, { "epoch": 8.579141104294479, "grad_norm": 0.23118215799331665, "learning_rate": 3.2343524016545154e-05, "loss": 0.8050051927566528, "step": 6992 }, { "epoch": 8.580368098159509, "grad_norm": 0.25984734296798706, "learning_rate": 3.233872612848082e-05, "loss": 0.8534350991249084, "step": 6993 }, { "epoch": 8.58159509202454, "grad_norm": 0.3102632462978363, "learning_rate": 3.2333927944613365e-05, "loss": 0.6439036130905151, "step": 6994 }, { "epoch": 8.582822085889571, "grad_norm": 0.24413904547691345, "learning_rate": 3.232912946513619e-05, "loss": 0.6806849241256714, "step": 6995 }, { "epoch": 8.584049079754601, "grad_norm": 0.28304117918014526, "learning_rate": 3.232433069024272e-05, "loss": 0.6425298452377319, "step": 6996 }, { "epoch": 8.585276073619632, "grad_norm": 0.2741943299770355, "learning_rate": 3.231953162012636e-05, "loss": 0.724738359451294, "step": 6997 }, { "epoch": 8.586503067484662, "grad_norm": 0.24291208386421204, "learning_rate": 3.2314732254980565e-05, "loss": 0.681317150592804, "step": 6998 }, { "epoch": 8.587730061349694, "grad_norm": 0.27551132440567017, "learning_rate": 3.230993259499876e-05, "loss": 0.7975022792816162, "step": 6999 }, { "epoch": 8.588957055214724, "grad_norm": 0.21629011631011963, "learning_rate": 3.230513264037443e-05, "loss": 0.6835694909095764, "step": 7000 }, { "epoch": 8.590184049079754, "grad_norm": 0.2964117228984833, "learning_rate": 3.2300332391301047e-05, "loss": 0.5450855493545532, "step": 7001 }, { "epoch": 8.591411042944785, "grad_norm": 0.2992200255393982, "learning_rate": 3.229553184797207e-05, "loss": 0.7418898940086365, "step": 7002 }, { "epoch": 8.592638036809817, "grad_norm": 0.36426496505737305, "learning_rate": 3.2290731010581024e-05, "loss": 0.5572869777679443, "step": 7003 }, { "epoch": 8.593865030674847, "grad_norm": 0.2613292634487152, "learning_rate": 3.2285929879321404e-05, "loss": 0.6990295648574829, "step": 7004 }, { "epoch": 8.595092024539877, "grad_norm": 0.275217741727829, "learning_rate": 3.228112845438672e-05, "loss": 0.49181902408599854, "step": 7005 }, { "epoch": 8.596319018404907, "grad_norm": 0.3113466203212738, "learning_rate": 3.227632673597052e-05, "loss": 0.7207510471343994, "step": 7006 }, { "epoch": 8.59754601226994, "grad_norm": 0.26954951882362366, "learning_rate": 3.2271524724266347e-05, "loss": 0.5900956392288208, "step": 7007 }, { "epoch": 8.59877300613497, "grad_norm": 0.25499337911605835, "learning_rate": 3.226672241946775e-05, "loss": 0.8146379590034485, "step": 7008 }, { "epoch": 8.6, "grad_norm": 0.2361486405134201, "learning_rate": 3.2261919821768294e-05, "loss": 0.7237736582756042, "step": 7009 }, { "epoch": 8.60122699386503, "grad_norm": 0.26507505774497986, "learning_rate": 3.225711693136156e-05, "loss": 0.766194224357605, "step": 7010 }, { "epoch": 8.602453987730062, "grad_norm": 0.3631123900413513, "learning_rate": 3.225231374844114e-05, "loss": 0.6944797039031982, "step": 7011 }, { "epoch": 8.603680981595092, "grad_norm": 0.2782869040966034, "learning_rate": 3.2247510273200645e-05, "loss": 0.5270233154296875, "step": 7012 }, { "epoch": 8.604907975460122, "grad_norm": 0.2919541597366333, "learning_rate": 3.224270650583368e-05, "loss": 0.7909032106399536, "step": 7013 }, { "epoch": 8.606134969325154, "grad_norm": 0.3084772527217865, "learning_rate": 3.2237902446533866e-05, "loss": 0.7201387882232666, "step": 7014 }, { "epoch": 8.607361963190185, "grad_norm": 0.25793367624282837, "learning_rate": 3.223309809549485e-05, "loss": 0.5093235969543457, "step": 7015 }, { "epoch": 8.608588957055215, "grad_norm": 0.2588856518268585, "learning_rate": 3.222829345291028e-05, "loss": 0.7632476091384888, "step": 7016 }, { "epoch": 8.609815950920245, "grad_norm": 0.26544544100761414, "learning_rate": 3.2223488518973815e-05, "loss": 0.7225077748298645, "step": 7017 }, { "epoch": 8.611042944785275, "grad_norm": 0.23297129571437836, "learning_rate": 3.2218683293879135e-05, "loss": 0.745956301689148, "step": 7018 }, { "epoch": 8.612269938650307, "grad_norm": 0.27238038182258606, "learning_rate": 3.221387777781992e-05, "loss": 0.8468181490898132, "step": 7019 }, { "epoch": 8.613496932515337, "grad_norm": 0.22340688109397888, "learning_rate": 3.220907197098986e-05, "loss": 0.8797694444656372, "step": 7020 }, { "epoch": 8.614723926380368, "grad_norm": 0.2641073763370514, "learning_rate": 3.220426587358267e-05, "loss": 0.5171293020248413, "step": 7021 }, { "epoch": 8.6159509202454, "grad_norm": 0.24013440310955048, "learning_rate": 3.2199459485792074e-05, "loss": 0.8419620394706726, "step": 7022 }, { "epoch": 8.61717791411043, "grad_norm": 0.27256789803504944, "learning_rate": 3.2194652807811806e-05, "loss": 0.6061680316925049, "step": 7023 }, { "epoch": 8.61840490797546, "grad_norm": 0.28809913992881775, "learning_rate": 3.218984583983559e-05, "loss": 0.6350823640823364, "step": 7024 }, { "epoch": 8.61963190184049, "grad_norm": 0.2914152145385742, "learning_rate": 3.2185038582057204e-05, "loss": 0.5936622619628906, "step": 7025 }, { "epoch": 8.62085889570552, "grad_norm": 0.289506733417511, "learning_rate": 3.21802310346704e-05, "loss": 0.7899729013442993, "step": 7026 }, { "epoch": 8.622085889570553, "grad_norm": 0.25545087456703186, "learning_rate": 3.2175423197868956e-05, "loss": 0.6885992288589478, "step": 7027 }, { "epoch": 8.623312883435583, "grad_norm": 0.25259172916412354, "learning_rate": 3.217061507184668e-05, "loss": 0.7688367962837219, "step": 7028 }, { "epoch": 8.624539877300613, "grad_norm": 0.25663286447525024, "learning_rate": 3.216580665679735e-05, "loss": 0.7908075451850891, "step": 7029 }, { "epoch": 8.625766871165645, "grad_norm": 0.24112564325332642, "learning_rate": 3.21609979529148e-05, "loss": 0.54743492603302, "step": 7030 }, { "epoch": 8.626993865030675, "grad_norm": 0.3185423016548157, "learning_rate": 3.215618896039284e-05, "loss": 0.5630435943603516, "step": 7031 }, { "epoch": 8.628220858895705, "grad_norm": 0.3377477824687958, "learning_rate": 3.215137967942532e-05, "loss": 0.5385518074035645, "step": 7032 }, { "epoch": 8.629447852760736, "grad_norm": 0.29712024331092834, "learning_rate": 3.2146570110206074e-05, "loss": 0.6852816939353943, "step": 7033 }, { "epoch": 8.630674846625768, "grad_norm": 0.33147627115249634, "learning_rate": 3.214176025292897e-05, "loss": 0.5660465955734253, "step": 7034 }, { "epoch": 8.631901840490798, "grad_norm": 0.345645546913147, "learning_rate": 3.2136950107787894e-05, "loss": 0.42847609519958496, "step": 7035 }, { "epoch": 8.633128834355828, "grad_norm": 0.27861273288726807, "learning_rate": 3.21321396749767e-05, "loss": 0.7361289858818054, "step": 7036 }, { "epoch": 8.634355828220858, "grad_norm": 0.32396242022514343, "learning_rate": 3.2127328954689307e-05, "loss": 0.8834144473075867, "step": 7037 }, { "epoch": 8.63558282208589, "grad_norm": 0.29150915145874023, "learning_rate": 3.2122517947119596e-05, "loss": 0.6073637008666992, "step": 7038 }, { "epoch": 8.63680981595092, "grad_norm": 0.2922620177268982, "learning_rate": 3.211770665246151e-05, "loss": 0.6316481828689575, "step": 7039 }, { "epoch": 8.63803680981595, "grad_norm": 0.2764432430267334, "learning_rate": 3.2112895070908965e-05, "loss": 0.6000758409500122, "step": 7040 }, { "epoch": 8.639263803680981, "grad_norm": 0.24040107429027557, "learning_rate": 3.210808320265591e-05, "loss": 0.8446850776672363, "step": 7041 }, { "epoch": 8.640490797546013, "grad_norm": 0.2535455524921417, "learning_rate": 3.21032710478963e-05, "loss": 0.6619484424591064, "step": 7042 }, { "epoch": 8.641717791411043, "grad_norm": 0.3208259046077728, "learning_rate": 3.209845860682408e-05, "loss": 0.7221158742904663, "step": 7043 }, { "epoch": 8.642944785276073, "grad_norm": 0.2864595651626587, "learning_rate": 3.209364587963326e-05, "loss": 0.6087483763694763, "step": 7044 }, { "epoch": 8.644171779141104, "grad_norm": 0.29824721813201904, "learning_rate": 3.208883286651778e-05, "loss": 0.6266040802001953, "step": 7045 }, { "epoch": 8.645398773006136, "grad_norm": 0.2940498888492584, "learning_rate": 3.208401956767168e-05, "loss": 0.6596096754074097, "step": 7046 }, { "epoch": 8.646625766871166, "grad_norm": 0.24441702663898468, "learning_rate": 3.2079205983288954e-05, "loss": 0.7532956600189209, "step": 7047 }, { "epoch": 8.647852760736196, "grad_norm": 0.31674572825431824, "learning_rate": 3.207439211356363e-05, "loss": 0.49774760007858276, "step": 7048 }, { "epoch": 8.649079754601226, "grad_norm": 0.26739010214805603, "learning_rate": 3.2069577958689724e-05, "loss": 0.7333388328552246, "step": 7049 }, { "epoch": 8.650306748466258, "grad_norm": 0.2906285524368286, "learning_rate": 3.2064763518861305e-05, "loss": 0.7111189961433411, "step": 7050 }, { "epoch": 8.651533742331289, "grad_norm": 0.3018507659435272, "learning_rate": 3.205994879427241e-05, "loss": 0.6509793400764465, "step": 7051 }, { "epoch": 8.652760736196319, "grad_norm": 0.24523931741714478, "learning_rate": 3.205513378511712e-05, "loss": 0.7678160667419434, "step": 7052 }, { "epoch": 8.653987730061349, "grad_norm": 0.2364197075366974, "learning_rate": 3.2050318491589506e-05, "loss": 0.7197206020355225, "step": 7053 }, { "epoch": 8.655214723926381, "grad_norm": 0.22535140812397003, "learning_rate": 3.204550291388366e-05, "loss": 0.7983264923095703, "step": 7054 }, { "epoch": 8.656441717791411, "grad_norm": 0.2974867522716522, "learning_rate": 3.204068705219369e-05, "loss": 0.7883186340332031, "step": 7055 }, { "epoch": 8.657668711656441, "grad_norm": 0.21358416974544525, "learning_rate": 3.203587090671371e-05, "loss": 0.8660653829574585, "step": 7056 }, { "epoch": 8.658895705521472, "grad_norm": 0.3024669289588928, "learning_rate": 3.203105447763783e-05, "loss": 0.5535157918930054, "step": 7057 }, { "epoch": 8.660122699386504, "grad_norm": 0.3007016181945801, "learning_rate": 3.20262377651602e-05, "loss": 0.5342319011688232, "step": 7058 }, { "epoch": 8.661349693251534, "grad_norm": 0.2792603075504303, "learning_rate": 3.202142076947498e-05, "loss": 0.7212613224983215, "step": 7059 }, { "epoch": 8.662576687116564, "grad_norm": 0.3607179820537567, "learning_rate": 3.20166034907763e-05, "loss": 0.5918400883674622, "step": 7060 }, { "epoch": 8.663803680981594, "grad_norm": 0.34924426674842834, "learning_rate": 3.201178592925834e-05, "loss": 0.7070909738540649, "step": 7061 }, { "epoch": 8.665030674846626, "grad_norm": 0.26655662059783936, "learning_rate": 3.20069680851153e-05, "loss": 0.7467412948608398, "step": 7062 }, { "epoch": 8.666257668711657, "grad_norm": 0.2737058103084564, "learning_rate": 3.200214995854135e-05, "loss": 0.5304536819458008, "step": 7063 }, { "epoch": 8.667484662576687, "grad_norm": 0.2261277288198471, "learning_rate": 3.1997331549730716e-05, "loss": 0.8169074058532715, "step": 7064 }, { "epoch": 8.668711656441717, "grad_norm": 0.2202039211988449, "learning_rate": 3.19925128588776e-05, "loss": 0.8095483779907227, "step": 7065 }, { "epoch": 8.669938650306749, "grad_norm": 0.28171491622924805, "learning_rate": 3.198769388617624e-05, "loss": 0.6162018775939941, "step": 7066 }, { "epoch": 8.67116564417178, "grad_norm": 0.29711949825286865, "learning_rate": 3.198287463182086e-05, "loss": 0.6469006538391113, "step": 7067 }, { "epoch": 8.67239263803681, "grad_norm": 0.2925715744495392, "learning_rate": 3.197805509600572e-05, "loss": 0.6685038805007935, "step": 7068 }, { "epoch": 8.67361963190184, "grad_norm": 0.2486102432012558, "learning_rate": 3.1973235278925086e-05, "loss": 0.6982793807983398, "step": 7069 }, { "epoch": 8.674846625766872, "grad_norm": 0.29022452235221863, "learning_rate": 3.196841518077322e-05, "loss": 0.4179355502128601, "step": 7070 }, { "epoch": 8.676073619631902, "grad_norm": 0.2666460871696472, "learning_rate": 3.196359480174442e-05, "loss": 0.7878845930099487, "step": 7071 }, { "epoch": 8.677300613496932, "grad_norm": 0.2995654046535492, "learning_rate": 3.195877414203296e-05, "loss": 0.7517846822738647, "step": 7072 }, { "epoch": 8.678527607361962, "grad_norm": 0.42757105827331543, "learning_rate": 3.1953953201833176e-05, "loss": 0.5946692228317261, "step": 7073 }, { "epoch": 8.679754601226994, "grad_norm": 0.23676055669784546, "learning_rate": 3.1949131981339366e-05, "loss": 0.723479151725769, "step": 7074 }, { "epoch": 8.680981595092025, "grad_norm": 0.3822033107280731, "learning_rate": 3.194431048074587e-05, "loss": 0.6696454286575317, "step": 7075 }, { "epoch": 8.682208588957055, "grad_norm": 0.24252213537693024, "learning_rate": 3.1939488700247025e-05, "loss": 0.7519179582595825, "step": 7076 }, { "epoch": 8.683435582822085, "grad_norm": 0.2931329607963562, "learning_rate": 3.193466664003717e-05, "loss": 0.5456866025924683, "step": 7077 }, { "epoch": 8.684662576687117, "grad_norm": 0.24010054767131805, "learning_rate": 3.1929844300310694e-05, "loss": 0.6682682037353516, "step": 7078 }, { "epoch": 8.685889570552147, "grad_norm": 0.2831179201602936, "learning_rate": 3.192502168126196e-05, "loss": 0.9477849006652832, "step": 7079 }, { "epoch": 8.687116564417177, "grad_norm": 0.23035219311714172, "learning_rate": 3.192019878308534e-05, "loss": 0.7768336534500122, "step": 7080 }, { "epoch": 8.68834355828221, "grad_norm": 0.33065301179885864, "learning_rate": 3.191537560597526e-05, "loss": 0.482877254486084, "step": 7081 }, { "epoch": 8.68957055214724, "grad_norm": 0.3124890923500061, "learning_rate": 3.19105521501261e-05, "loss": 0.8021960258483887, "step": 7082 }, { "epoch": 8.69079754601227, "grad_norm": 0.3471950590610504, "learning_rate": 3.1905728415732293e-05, "loss": 0.5712841749191284, "step": 7083 }, { "epoch": 8.6920245398773, "grad_norm": 0.37110668420791626, "learning_rate": 3.1900904402988273e-05, "loss": 0.5651617646217346, "step": 7084 }, { "epoch": 8.69325153374233, "grad_norm": 0.2670132517814636, "learning_rate": 3.189608011208848e-05, "loss": 0.635303258895874, "step": 7085 }, { "epoch": 8.694478527607362, "grad_norm": 0.2371019572019577, "learning_rate": 3.189125554322736e-05, "loss": 0.7468119859695435, "step": 7086 }, { "epoch": 8.695705521472393, "grad_norm": 0.2628888785839081, "learning_rate": 3.188643069659939e-05, "loss": 0.6393224000930786, "step": 7087 }, { "epoch": 8.696932515337423, "grad_norm": 0.27353546023368835, "learning_rate": 3.188160557239904e-05, "loss": 0.6394062042236328, "step": 7088 }, { "epoch": 8.698159509202455, "grad_norm": 0.33746328949928284, "learning_rate": 3.1876780170820794e-05, "loss": 0.5921176671981812, "step": 7089 }, { "epoch": 8.699386503067485, "grad_norm": 0.26060837507247925, "learning_rate": 3.1871954492059156e-05, "loss": 0.7313392162322998, "step": 7090 }, { "epoch": 8.700613496932515, "grad_norm": 0.23094607889652252, "learning_rate": 3.186712853630862e-05, "loss": 0.7492491602897644, "step": 7091 }, { "epoch": 8.701840490797546, "grad_norm": 0.25629276037216187, "learning_rate": 3.186230230376372e-05, "loss": 0.7560296058654785, "step": 7092 }, { "epoch": 8.703067484662578, "grad_norm": 0.30920401215553284, "learning_rate": 3.1857475794619e-05, "loss": 0.6407120227813721, "step": 7093 }, { "epoch": 8.704294478527608, "grad_norm": 0.254942923784256, "learning_rate": 3.185264900906897e-05, "loss": 0.7781765460968018, "step": 7094 }, { "epoch": 8.705521472392638, "grad_norm": 0.30199503898620605, "learning_rate": 3.184782194730822e-05, "loss": 0.5342777967453003, "step": 7095 }, { "epoch": 8.706748466257668, "grad_norm": 0.2823483347892761, "learning_rate": 3.1842994609531296e-05, "loss": 0.7316378951072693, "step": 7096 }, { "epoch": 8.7079754601227, "grad_norm": 0.29659852385520935, "learning_rate": 3.183816699593277e-05, "loss": 0.6920750737190247, "step": 7097 }, { "epoch": 8.70920245398773, "grad_norm": 0.2131638377904892, "learning_rate": 3.183333910670724e-05, "loss": 0.7669156789779663, "step": 7098 }, { "epoch": 8.71042944785276, "grad_norm": 0.26288318634033203, "learning_rate": 3.18285109420493e-05, "loss": 0.74807208776474, "step": 7099 }, { "epoch": 8.71165644171779, "grad_norm": 0.2374694049358368, "learning_rate": 3.182368250215355e-05, "loss": 0.7371885776519775, "step": 7100 }, { "epoch": 8.712883435582823, "grad_norm": 0.2678435742855072, "learning_rate": 3.181885378721463e-05, "loss": 0.6039866209030151, "step": 7101 }, { "epoch": 8.714110429447853, "grad_norm": 0.30969274044036865, "learning_rate": 3.1814024797427164e-05, "loss": 0.7508230805397034, "step": 7102 }, { "epoch": 8.715337423312883, "grad_norm": 0.32528191804885864, "learning_rate": 3.180919553298578e-05, "loss": 0.5969101190567017, "step": 7103 }, { "epoch": 8.716564417177914, "grad_norm": 0.379505455493927, "learning_rate": 3.180436599408515e-05, "loss": 0.5876503586769104, "step": 7104 }, { "epoch": 8.717791411042946, "grad_norm": 0.42393389344215393, "learning_rate": 3.1799536180919933e-05, "loss": 0.5219439268112183, "step": 7105 }, { "epoch": 8.719018404907976, "grad_norm": 0.27675503492355347, "learning_rate": 3.1794706093684804e-05, "loss": 0.8339205980300903, "step": 7106 }, { "epoch": 8.720245398773006, "grad_norm": 0.3111865222454071, "learning_rate": 3.1789875732574466e-05, "loss": 0.7877544164657593, "step": 7107 }, { "epoch": 8.721472392638036, "grad_norm": 0.2858370542526245, "learning_rate": 3.178504509778358e-05, "loss": 0.5888288021087646, "step": 7108 }, { "epoch": 8.722699386503068, "grad_norm": 0.2830832004547119, "learning_rate": 3.1780214189506894e-05, "loss": 0.7597619295120239, "step": 7109 }, { "epoch": 8.723926380368098, "grad_norm": 0.2224857062101364, "learning_rate": 3.1775383007939105e-05, "loss": 0.5366001725196838, "step": 7110 }, { "epoch": 8.725153374233129, "grad_norm": 0.37169572710990906, "learning_rate": 3.177055155327496e-05, "loss": 0.5608725547790527, "step": 7111 }, { "epoch": 8.726380368098159, "grad_norm": 0.2980658710002899, "learning_rate": 3.176571982570918e-05, "loss": 0.6298316717147827, "step": 7112 }, { "epoch": 8.72760736196319, "grad_norm": 0.39273110032081604, "learning_rate": 3.1760887825436535e-05, "loss": 0.5356027483940125, "step": 7113 }, { "epoch": 8.728834355828221, "grad_norm": 0.24094124138355255, "learning_rate": 3.1756055552651786e-05, "loss": 0.7130757570266724, "step": 7114 }, { "epoch": 8.730061349693251, "grad_norm": 0.256777286529541, "learning_rate": 3.175122300754971e-05, "loss": 0.8565912246704102, "step": 7115 }, { "epoch": 8.731288343558282, "grad_norm": 0.22572124004364014, "learning_rate": 3.1746390190325075e-05, "loss": 0.7298089265823364, "step": 7116 }, { "epoch": 8.732515337423314, "grad_norm": 0.26524510979652405, "learning_rate": 3.174155710117271e-05, "loss": 0.5650326609611511, "step": 7117 }, { "epoch": 8.733742331288344, "grad_norm": 0.2878963053226471, "learning_rate": 3.1736723740287396e-05, "loss": 0.608792781829834, "step": 7118 }, { "epoch": 8.734969325153374, "grad_norm": 0.24034669995307922, "learning_rate": 3.1731890107863974e-05, "loss": 0.5466711521148682, "step": 7119 }, { "epoch": 8.736196319018404, "grad_norm": 0.28748103976249695, "learning_rate": 3.172705620409725e-05, "loss": 0.7767056822776794, "step": 7120 }, { "epoch": 8.737423312883436, "grad_norm": 0.286166250705719, "learning_rate": 3.172222202918207e-05, "loss": 0.6573950052261353, "step": 7121 }, { "epoch": 8.738650306748466, "grad_norm": 0.2769182622432709, "learning_rate": 3.1717387583313305e-05, "loss": 0.7950841188430786, "step": 7122 }, { "epoch": 8.739877300613497, "grad_norm": 0.3664399981498718, "learning_rate": 3.17125528666858e-05, "loss": 0.45789051055908203, "step": 7123 }, { "epoch": 8.741104294478527, "grad_norm": 0.23216037452220917, "learning_rate": 3.1707717879494436e-05, "loss": 0.80743008852005, "step": 7124 }, { "epoch": 8.742331288343559, "grad_norm": 0.26370444893836975, "learning_rate": 3.170288262193409e-05, "loss": 0.6763865947723389, "step": 7125 }, { "epoch": 8.743558282208589, "grad_norm": 0.23958635330200195, "learning_rate": 3.169804709419967e-05, "loss": 0.6829057931900024, "step": 7126 }, { "epoch": 8.74478527607362, "grad_norm": 0.27786558866500854, "learning_rate": 3.169321129648607e-05, "loss": 0.5060064792633057, "step": 7127 }, { "epoch": 8.74601226993865, "grad_norm": 0.30367499589920044, "learning_rate": 3.1688375228988206e-05, "loss": 0.6986193656921387, "step": 7128 }, { "epoch": 8.747239263803682, "grad_norm": 0.21720905601978302, "learning_rate": 3.1683538891901034e-05, "loss": 0.6928238868713379, "step": 7129 }, { "epoch": 8.748466257668712, "grad_norm": 0.29531964659690857, "learning_rate": 3.1678702285419445e-05, "loss": 0.7382892370223999, "step": 7130 }, { "epoch": 8.749693251533742, "grad_norm": 0.2912874221801758, "learning_rate": 3.167386540973843e-05, "loss": 0.6857497692108154, "step": 7131 }, { "epoch": 8.750920245398772, "grad_norm": 0.39172229170799255, "learning_rate": 3.1669028265052934e-05, "loss": 0.4331640601158142, "step": 7132 }, { "epoch": 8.752147239263804, "grad_norm": 0.29546919465065, "learning_rate": 3.1664190851557926e-05, "loss": 0.7721864581108093, "step": 7133 }, { "epoch": 8.753374233128834, "grad_norm": 0.33804306387901306, "learning_rate": 3.16593531694484e-05, "loss": 0.6014484763145447, "step": 7134 }, { "epoch": 8.754601226993865, "grad_norm": 0.25643298029899597, "learning_rate": 3.165451521891933e-05, "loss": 0.7087319493293762, "step": 7135 }, { "epoch": 8.755828220858895, "grad_norm": 0.29900115728378296, "learning_rate": 3.164967700016575e-05, "loss": 0.5535248517990112, "step": 7136 }, { "epoch": 8.757055214723927, "grad_norm": 0.26936161518096924, "learning_rate": 3.1644838513382634e-05, "loss": 0.7412031888961792, "step": 7137 }, { "epoch": 8.758282208588957, "grad_norm": 0.33714011311531067, "learning_rate": 3.163999975876503e-05, "loss": 0.5664148330688477, "step": 7138 }, { "epoch": 8.759509202453987, "grad_norm": 0.26997828483581543, "learning_rate": 3.163516073650798e-05, "loss": 0.6847031712532043, "step": 7139 }, { "epoch": 8.76073619631902, "grad_norm": 0.250271201133728, "learning_rate": 3.1630321446806524e-05, "loss": 0.7970142364501953, "step": 7140 }, { "epoch": 8.76196319018405, "grad_norm": 0.30378520488739014, "learning_rate": 3.1625481889855727e-05, "loss": 0.6846522092819214, "step": 7141 }, { "epoch": 8.76319018404908, "grad_norm": 0.3006986081600189, "learning_rate": 3.162064206585064e-05, "loss": 0.5900717377662659, "step": 7142 }, { "epoch": 8.76441717791411, "grad_norm": 0.6109442114830017, "learning_rate": 3.161580197498637e-05, "loss": 0.7344793081283569, "step": 7143 }, { "epoch": 8.76564417177914, "grad_norm": 0.31701526045799255, "learning_rate": 3.1610961617457975e-05, "loss": 0.6015625596046448, "step": 7144 }, { "epoch": 8.766871165644172, "grad_norm": 0.2497977912425995, "learning_rate": 3.160612099346057e-05, "loss": 0.7574054002761841, "step": 7145 }, { "epoch": 8.768098159509202, "grad_norm": 0.32035303115844727, "learning_rate": 3.1601280103189284e-05, "loss": 0.5619712471961975, "step": 7146 }, { "epoch": 8.769325153374233, "grad_norm": 0.34491997957229614, "learning_rate": 3.159643894683922e-05, "loss": 0.4663776457309723, "step": 7147 }, { "epoch": 8.770552147239265, "grad_norm": 0.29584306478500366, "learning_rate": 3.1591597524605515e-05, "loss": 0.6873115301132202, "step": 7148 }, { "epoch": 8.771779141104295, "grad_norm": 0.23061777651309967, "learning_rate": 3.15867558366833e-05, "loss": 0.5799086689949036, "step": 7149 }, { "epoch": 8.773006134969325, "grad_norm": 0.2935281991958618, "learning_rate": 3.158191388326776e-05, "loss": 0.6148784756660461, "step": 7150 }, { "epoch": 8.774233128834355, "grad_norm": 0.23161602020263672, "learning_rate": 3.157707166455404e-05, "loss": 0.7272052764892578, "step": 7151 }, { "epoch": 8.775460122699386, "grad_norm": 0.2943994700908661, "learning_rate": 3.157222918073732e-05, "loss": 0.5820614695549011, "step": 7152 }, { "epoch": 8.776687116564418, "grad_norm": 0.3047887980937958, "learning_rate": 3.1567386432012784e-05, "loss": 0.5494312644004822, "step": 7153 }, { "epoch": 8.777914110429448, "grad_norm": 0.34997811913490295, "learning_rate": 3.156254341857563e-05, "loss": 0.7679262161254883, "step": 7154 }, { "epoch": 8.779141104294478, "grad_norm": 0.30701231956481934, "learning_rate": 3.1557700140621074e-05, "loss": 0.5744431018829346, "step": 7155 }, { "epoch": 8.78036809815951, "grad_norm": 0.34430378675460815, "learning_rate": 3.155285659834432e-05, "loss": 0.5381165742874146, "step": 7156 }, { "epoch": 8.78159509202454, "grad_norm": 0.24890942871570587, "learning_rate": 3.154801279194061e-05, "loss": 0.7753810882568359, "step": 7157 }, { "epoch": 8.78282208588957, "grad_norm": 0.27274370193481445, "learning_rate": 3.154316872160519e-05, "loss": 0.6741266250610352, "step": 7158 }, { "epoch": 8.7840490797546, "grad_norm": 0.32597246766090393, "learning_rate": 3.153832438753328e-05, "loss": 0.6730363368988037, "step": 7159 }, { "epoch": 8.785276073619633, "grad_norm": 0.26809531450271606, "learning_rate": 3.153347978992019e-05, "loss": 0.5784745216369629, "step": 7160 }, { "epoch": 8.786503067484663, "grad_norm": 0.31964272260665894, "learning_rate": 3.152863492896113e-05, "loss": 0.746888279914856, "step": 7161 }, { "epoch": 8.787730061349693, "grad_norm": 0.22893008589744568, "learning_rate": 3.152378980485143e-05, "loss": 0.5496758222579956, "step": 7162 }, { "epoch": 8.788957055214723, "grad_norm": 0.26699915528297424, "learning_rate": 3.151894441778638e-05, "loss": 0.6990241408348083, "step": 7163 }, { "epoch": 8.790184049079755, "grad_norm": 0.2547342777252197, "learning_rate": 3.151409876796126e-05, "loss": 0.753642201423645, "step": 7164 }, { "epoch": 8.791411042944786, "grad_norm": 0.25802236795425415, "learning_rate": 3.150925285557141e-05, "loss": 0.8034678101539612, "step": 7165 }, { "epoch": 8.792638036809816, "grad_norm": 0.23619547486305237, "learning_rate": 3.150440668081213e-05, "loss": 0.7981911897659302, "step": 7166 }, { "epoch": 8.793865030674846, "grad_norm": 0.28081321716308594, "learning_rate": 3.1499560243878766e-05, "loss": 0.6300210356712341, "step": 7167 }, { "epoch": 8.795092024539878, "grad_norm": 0.1867445856332779, "learning_rate": 3.1494713544966684e-05, "loss": 0.8800846338272095, "step": 7168 }, { "epoch": 8.796319018404908, "grad_norm": 0.2709420323371887, "learning_rate": 3.14898665842712e-05, "loss": 0.7429839372634888, "step": 7169 }, { "epoch": 8.797546012269938, "grad_norm": 0.21462129056453705, "learning_rate": 3.148501936198772e-05, "loss": 0.8323190808296204, "step": 7170 }, { "epoch": 8.798773006134969, "grad_norm": 0.2313939929008484, "learning_rate": 3.14801718783116e-05, "loss": 0.6926698088645935, "step": 7171 }, { "epoch": 8.8, "grad_norm": 0.3082175850868225, "learning_rate": 3.1475324133438236e-05, "loss": 0.6177433133125305, "step": 7172 }, { "epoch": 8.801226993865031, "grad_norm": 0.28537777066230774, "learning_rate": 3.147047612756302e-05, "loss": 0.8549178838729858, "step": 7173 }, { "epoch": 8.802453987730061, "grad_norm": 0.28986525535583496, "learning_rate": 3.1465627860881366e-05, "loss": 0.634994626045227, "step": 7174 }, { "epoch": 8.803680981595091, "grad_norm": 0.2674698829650879, "learning_rate": 3.14607793335887e-05, "loss": 0.7971832156181335, "step": 7175 }, { "epoch": 8.804907975460123, "grad_norm": 0.23343613743782043, "learning_rate": 3.145593054588044e-05, "loss": 0.6104985475540161, "step": 7176 }, { "epoch": 8.806134969325154, "grad_norm": 0.2793191969394684, "learning_rate": 3.145108149795203e-05, "loss": 0.6548206210136414, "step": 7177 }, { "epoch": 8.807361963190184, "grad_norm": 0.324802041053772, "learning_rate": 3.1446232189998934e-05, "loss": 0.644484281539917, "step": 7178 }, { "epoch": 8.808588957055214, "grad_norm": 0.2667120695114136, "learning_rate": 3.144138262221659e-05, "loss": 0.6187687516212463, "step": 7179 }, { "epoch": 8.809815950920246, "grad_norm": 0.32201141119003296, "learning_rate": 3.14365327948005e-05, "loss": 0.842058002948761, "step": 7180 }, { "epoch": 8.811042944785276, "grad_norm": 0.26629766821861267, "learning_rate": 3.143168270794612e-05, "loss": 0.6359169483184814, "step": 7181 }, { "epoch": 8.812269938650306, "grad_norm": 0.31563448905944824, "learning_rate": 3.1426832361848954e-05, "loss": 0.7421669363975525, "step": 7182 }, { "epoch": 8.813496932515337, "grad_norm": 0.23253953456878662, "learning_rate": 3.1421981756704506e-05, "loss": 0.9279754757881165, "step": 7183 }, { "epoch": 8.814723926380369, "grad_norm": 0.2736014425754547, "learning_rate": 3.1417130892708294e-05, "loss": 0.7259814739227295, "step": 7184 }, { "epoch": 8.815950920245399, "grad_norm": 0.2984899580478668, "learning_rate": 3.141227977005583e-05, "loss": 0.6971849203109741, "step": 7185 }, { "epoch": 8.81717791411043, "grad_norm": 0.4720645248889923, "learning_rate": 3.140742838894266e-05, "loss": 0.7062433958053589, "step": 7186 }, { "epoch": 8.81840490797546, "grad_norm": 0.23507748544216156, "learning_rate": 3.140257674956433e-05, "loss": 0.7216124534606934, "step": 7187 }, { "epoch": 8.819631901840491, "grad_norm": 0.2591291666030884, "learning_rate": 3.139772485211639e-05, "loss": 0.8736591339111328, "step": 7188 }, { "epoch": 8.820858895705522, "grad_norm": 0.34285011887550354, "learning_rate": 3.139287269679441e-05, "loss": 0.5319074988365173, "step": 7189 }, { "epoch": 8.822085889570552, "grad_norm": 0.3366604745388031, "learning_rate": 3.1388020283793964e-05, "loss": 0.6067326068878174, "step": 7190 }, { "epoch": 8.823312883435582, "grad_norm": 0.25292450189590454, "learning_rate": 3.1383167613310636e-05, "loss": 0.732252299785614, "step": 7191 }, { "epoch": 8.824539877300614, "grad_norm": 0.24510478973388672, "learning_rate": 3.1378314685540026e-05, "loss": 0.7344874739646912, "step": 7192 }, { "epoch": 8.825766871165644, "grad_norm": 0.2796882092952728, "learning_rate": 3.137346150067775e-05, "loss": 0.6359445452690125, "step": 7193 }, { "epoch": 8.826993865030675, "grad_norm": 0.26720985770225525, "learning_rate": 3.136860805891941e-05, "loss": 0.6532543301582336, "step": 7194 }, { "epoch": 8.828220858895705, "grad_norm": 1.8333325386047363, "learning_rate": 3.136375436046064e-05, "loss": 0.6128031015396118, "step": 7195 }, { "epoch": 8.829447852760737, "grad_norm": 0.3080931305885315, "learning_rate": 3.13589004054971e-05, "loss": 0.7341431379318237, "step": 7196 }, { "epoch": 8.830674846625767, "grad_norm": 0.33216243982315063, "learning_rate": 3.13540461942244e-05, "loss": 0.8169431686401367, "step": 7197 }, { "epoch": 8.831901840490797, "grad_norm": 0.24650847911834717, "learning_rate": 3.134919172683823e-05, "loss": 0.6527185440063477, "step": 7198 }, { "epoch": 8.833128834355827, "grad_norm": 0.30478814244270325, "learning_rate": 3.1344337003534254e-05, "loss": 0.5396736860275269, "step": 7199 }, { "epoch": 8.83435582822086, "grad_norm": 0.2543686628341675, "learning_rate": 3.133948202450814e-05, "loss": 0.7080013155937195, "step": 7200 }, { "epoch": 8.83558282208589, "grad_norm": 0.2672598361968994, "learning_rate": 3.133462678995559e-05, "loss": 0.8028708696365356, "step": 7201 }, { "epoch": 8.83680981595092, "grad_norm": 0.23414944112300873, "learning_rate": 3.1329771300072304e-05, "loss": 0.9278010129928589, "step": 7202 }, { "epoch": 8.83803680981595, "grad_norm": 0.26830464601516724, "learning_rate": 3.132491555505398e-05, "loss": 0.7047871351242065, "step": 7203 }, { "epoch": 8.839263803680982, "grad_norm": 0.2678429186344147, "learning_rate": 3.132005955509636e-05, "loss": 0.7420195937156677, "step": 7204 }, { "epoch": 8.840490797546012, "grad_norm": 0.28478166460990906, "learning_rate": 3.1315203300395155e-05, "loss": 0.7316564321517944, "step": 7205 }, { "epoch": 8.841717791411043, "grad_norm": 0.2773301899433136, "learning_rate": 3.131034679114613e-05, "loss": 0.6276045441627502, "step": 7206 }, { "epoch": 8.842944785276075, "grad_norm": 0.4300035536289215, "learning_rate": 3.130549002754501e-05, "loss": 0.821557343006134, "step": 7207 }, { "epoch": 8.844171779141105, "grad_norm": 0.23269660770893097, "learning_rate": 3.130063300978758e-05, "loss": 0.8409124612808228, "step": 7208 }, { "epoch": 8.845398773006135, "grad_norm": 0.2951713502407074, "learning_rate": 3.1295775738069597e-05, "loss": 0.832256019115448, "step": 7209 }, { "epoch": 8.846625766871165, "grad_norm": 0.22543257474899292, "learning_rate": 3.129091821258685e-05, "loss": 0.6846653819084167, "step": 7210 }, { "epoch": 8.847852760736195, "grad_norm": 0.3099351227283478, "learning_rate": 3.128606043353514e-05, "loss": 0.7068372964859009, "step": 7211 }, { "epoch": 8.849079754601227, "grad_norm": 0.29917800426483154, "learning_rate": 3.128120240111026e-05, "loss": 0.734855055809021, "step": 7212 }, { "epoch": 8.850306748466258, "grad_norm": 0.24123762547969818, "learning_rate": 3.1276344115508024e-05, "loss": 0.8030073642730713, "step": 7213 }, { "epoch": 8.851533742331288, "grad_norm": 0.2673616111278534, "learning_rate": 3.127148557692425e-05, "loss": 0.6483185291290283, "step": 7214 }, { "epoch": 8.85276073619632, "grad_norm": 0.2565689980983734, "learning_rate": 3.126662678555479e-05, "loss": 0.7822538614273071, "step": 7215 }, { "epoch": 8.85398773006135, "grad_norm": 0.257396399974823, "learning_rate": 3.1261767741595474e-05, "loss": 0.757263720035553, "step": 7216 }, { "epoch": 8.85521472392638, "grad_norm": 0.262928307056427, "learning_rate": 3.125690844524215e-05, "loss": 0.8085504174232483, "step": 7217 }, { "epoch": 8.85644171779141, "grad_norm": 0.28341618180274963, "learning_rate": 3.1252048896690704e-05, "loss": 0.5982089042663574, "step": 7218 }, { "epoch": 8.857668711656443, "grad_norm": 0.3004501760005951, "learning_rate": 3.1247189096137e-05, "loss": 0.5037796497344971, "step": 7219 }, { "epoch": 8.858895705521473, "grad_norm": 0.29513704776763916, "learning_rate": 3.124232904377691e-05, "loss": 0.7016496658325195, "step": 7220 }, { "epoch": 8.860122699386503, "grad_norm": 0.295942485332489, "learning_rate": 3.123746873980635e-05, "loss": 0.6071356534957886, "step": 7221 }, { "epoch": 8.861349693251533, "grad_norm": 0.25522953271865845, "learning_rate": 3.123260818442121e-05, "loss": 0.722460150718689, "step": 7222 }, { "epoch": 8.862576687116565, "grad_norm": 0.32930952310562134, "learning_rate": 3.1227747377817414e-05, "loss": 0.49019262194633484, "step": 7223 }, { "epoch": 8.863803680981595, "grad_norm": 0.2818688154220581, "learning_rate": 3.122288632019087e-05, "loss": 0.6133722066879272, "step": 7224 }, { "epoch": 8.865030674846626, "grad_norm": 0.24390080571174622, "learning_rate": 3.121802501173754e-05, "loss": 0.7510944604873657, "step": 7225 }, { "epoch": 8.866257668711656, "grad_norm": 0.24724753201007843, "learning_rate": 3.121316345265336e-05, "loss": 0.6433278322219849, "step": 7226 }, { "epoch": 8.867484662576688, "grad_norm": 0.28706780076026917, "learning_rate": 3.120830164313426e-05, "loss": 0.4837552607059479, "step": 7227 }, { "epoch": 8.868711656441718, "grad_norm": 0.40199828147888184, "learning_rate": 3.120343958337625e-05, "loss": 0.4199999272823334, "step": 7228 }, { "epoch": 8.869938650306748, "grad_norm": 0.2628554105758667, "learning_rate": 3.119857727357527e-05, "loss": 0.6756314039230347, "step": 7229 }, { "epoch": 8.871165644171779, "grad_norm": 0.25783827900886536, "learning_rate": 3.119371471392733e-05, "loss": 0.8166959285736084, "step": 7230 }, { "epoch": 8.87239263803681, "grad_norm": 0.19324643909931183, "learning_rate": 3.118885190462841e-05, "loss": 0.6660568118095398, "step": 7231 }, { "epoch": 8.87361963190184, "grad_norm": 0.306774377822876, "learning_rate": 3.118398884587451e-05, "loss": 0.8103880882263184, "step": 7232 }, { "epoch": 8.874846625766871, "grad_norm": 0.24147440493106842, "learning_rate": 3.1179125537861676e-05, "loss": 0.8611432313919067, "step": 7233 }, { "epoch": 8.876073619631901, "grad_norm": 0.2915154993534088, "learning_rate": 3.1174261980785905e-05, "loss": 0.7466111183166504, "step": 7234 }, { "epoch": 8.877300613496933, "grad_norm": 0.2673458158969879, "learning_rate": 3.116939817484325e-05, "loss": 0.7989678978919983, "step": 7235 }, { "epoch": 8.878527607361963, "grad_norm": 0.22309467196464539, "learning_rate": 3.1164534120229736e-05, "loss": 0.7921804785728455, "step": 7236 }, { "epoch": 8.879754601226994, "grad_norm": 0.2652793228626251, "learning_rate": 3.115966981714144e-05, "loss": 0.6983808875083923, "step": 7237 }, { "epoch": 8.880981595092024, "grad_norm": 0.25357598066329956, "learning_rate": 3.1154805265774425e-05, "loss": 0.7610939741134644, "step": 7238 }, { "epoch": 8.882208588957056, "grad_norm": 0.23701773583889008, "learning_rate": 3.1149940466324754e-05, "loss": 0.7912105321884155, "step": 7239 }, { "epoch": 8.883435582822086, "grad_norm": 0.2901599109172821, "learning_rate": 3.114507541898854e-05, "loss": 0.6389310359954834, "step": 7240 }, { "epoch": 8.884662576687116, "grad_norm": 0.2969072163105011, "learning_rate": 3.1140210123961847e-05, "loss": 0.4569082260131836, "step": 7241 }, { "epoch": 8.885889570552147, "grad_norm": 0.31441307067871094, "learning_rate": 3.113534458144081e-05, "loss": 0.6263986229896545, "step": 7242 }, { "epoch": 8.887116564417179, "grad_norm": 0.2225600779056549, "learning_rate": 3.113047879162152e-05, "loss": 0.7025034427642822, "step": 7243 }, { "epoch": 8.888343558282209, "grad_norm": 0.26492777466773987, "learning_rate": 3.112561275470012e-05, "loss": 0.6216025948524475, "step": 7244 }, { "epoch": 8.889570552147239, "grad_norm": 0.27593979239463806, "learning_rate": 3.112074647087274e-05, "loss": 0.7389239072799683, "step": 7245 }, { "epoch": 8.89079754601227, "grad_norm": 0.23312027752399445, "learning_rate": 3.1115879940335524e-05, "loss": 0.7800502181053162, "step": 7246 }, { "epoch": 8.892024539877301, "grad_norm": 0.26827308535575867, "learning_rate": 3.111101316328464e-05, "loss": 0.6152259111404419, "step": 7247 }, { "epoch": 8.893251533742331, "grad_norm": 0.24800369143486023, "learning_rate": 3.110614613991623e-05, "loss": 0.8009635210037231, "step": 7248 }, { "epoch": 8.894478527607362, "grad_norm": 0.3995833694934845, "learning_rate": 3.1101278870426486e-05, "loss": 0.7916545867919922, "step": 7249 }, { "epoch": 8.895705521472392, "grad_norm": 0.3499728739261627, "learning_rate": 3.10964113550116e-05, "loss": 0.535948634147644, "step": 7250 }, { "epoch": 8.896932515337424, "grad_norm": 0.20862816274166107, "learning_rate": 3.109154359386776e-05, "loss": 0.7514641284942627, "step": 7251 }, { "epoch": 8.898159509202454, "grad_norm": 0.3004094958305359, "learning_rate": 3.1086675587191164e-05, "loss": 0.553017258644104, "step": 7252 }, { "epoch": 8.899386503067484, "grad_norm": 0.2686738669872284, "learning_rate": 3.1081807335178036e-05, "loss": 0.6358020305633545, "step": 7253 }, { "epoch": 8.900613496932515, "grad_norm": 0.25185900926589966, "learning_rate": 3.107693883802461e-05, "loss": 0.6304751038551331, "step": 7254 }, { "epoch": 8.901840490797547, "grad_norm": 0.2833058536052704, "learning_rate": 3.10720700959271e-05, "loss": 0.5846331715583801, "step": 7255 }, { "epoch": 8.903067484662577, "grad_norm": 0.2794152498245239, "learning_rate": 3.1067201109081766e-05, "loss": 0.7861287593841553, "step": 7256 }, { "epoch": 8.904294478527607, "grad_norm": 0.25348174571990967, "learning_rate": 3.106233187768487e-05, "loss": 0.7661911249160767, "step": 7257 }, { "epoch": 8.905521472392637, "grad_norm": 0.2766904830932617, "learning_rate": 3.105746240193265e-05, "loss": 0.6613242626190186, "step": 7258 }, { "epoch": 8.90674846625767, "grad_norm": 0.2854800522327423, "learning_rate": 3.105259268202141e-05, "loss": 0.5153958797454834, "step": 7259 }, { "epoch": 8.9079754601227, "grad_norm": 0.33311814069747925, "learning_rate": 3.1047722718147416e-05, "loss": 0.6943892240524292, "step": 7260 }, { "epoch": 8.90920245398773, "grad_norm": 0.29300400614738464, "learning_rate": 3.104285251050696e-05, "loss": 0.621680736541748, "step": 7261 }, { "epoch": 8.91042944785276, "grad_norm": 0.37855905294418335, "learning_rate": 3.1037982059296366e-05, "loss": 0.6967287659645081, "step": 7262 }, { "epoch": 8.911656441717792, "grad_norm": 0.29473915696144104, "learning_rate": 3.103311136471193e-05, "loss": 0.5799374580383301, "step": 7263 }, { "epoch": 8.912883435582822, "grad_norm": 0.25290340185165405, "learning_rate": 3.1028240426949984e-05, "loss": 0.7913899421691895, "step": 7264 }, { "epoch": 8.914110429447852, "grad_norm": 0.2526668310165405, "learning_rate": 3.102336924620686e-05, "loss": 0.7267440557479858, "step": 7265 }, { "epoch": 8.915337423312884, "grad_norm": 0.31023022532463074, "learning_rate": 3.1018497822678907e-05, "loss": 0.7202735543251038, "step": 7266 }, { "epoch": 8.916564417177915, "grad_norm": 0.24581851065158844, "learning_rate": 3.101362615656246e-05, "loss": 0.7640163898468018, "step": 7267 }, { "epoch": 8.917791411042945, "grad_norm": 0.28028637170791626, "learning_rate": 3.10087542480539e-05, "loss": 0.6014326810836792, "step": 7268 }, { "epoch": 8.919018404907975, "grad_norm": 0.30770155787467957, "learning_rate": 3.100388209734959e-05, "loss": 0.7943921089172363, "step": 7269 }, { "epoch": 8.920245398773005, "grad_norm": 0.26351645588874817, "learning_rate": 3.099900970464592e-05, "loss": 0.6814409494400024, "step": 7270 }, { "epoch": 8.921472392638037, "grad_norm": 0.2735639214515686, "learning_rate": 3.099413707013928e-05, "loss": 0.7176274061203003, "step": 7271 }, { "epoch": 8.922699386503067, "grad_norm": 0.29325953125953674, "learning_rate": 3.098926419402606e-05, "loss": 0.785326361656189, "step": 7272 }, { "epoch": 8.923926380368098, "grad_norm": 0.23326806724071503, "learning_rate": 3.098439107650269e-05, "loss": 0.7428972721099854, "step": 7273 }, { "epoch": 8.92515337423313, "grad_norm": 0.27619609236717224, "learning_rate": 3.0979517717765586e-05, "loss": 0.49685004353523254, "step": 7274 }, { "epoch": 8.92638036809816, "grad_norm": 0.2622320353984833, "learning_rate": 3.097464411801118e-05, "loss": 0.7632527351379395, "step": 7275 }, { "epoch": 8.92760736196319, "grad_norm": 0.2805558741092682, "learning_rate": 3.0969770277435905e-05, "loss": 0.6677194833755493, "step": 7276 }, { "epoch": 8.92883435582822, "grad_norm": 0.4231622517108917, "learning_rate": 3.096489619623622e-05, "loss": 0.3634946048259735, "step": 7277 }, { "epoch": 8.93006134969325, "grad_norm": 0.2323133945465088, "learning_rate": 3.096002187460857e-05, "loss": 0.5252811312675476, "step": 7278 }, { "epoch": 8.931288343558283, "grad_norm": 0.36213698983192444, "learning_rate": 3.0955147312749446e-05, "loss": 0.5122374296188354, "step": 7279 }, { "epoch": 8.932515337423313, "grad_norm": 0.2557947039604187, "learning_rate": 3.095027251085532e-05, "loss": 0.8338966369628906, "step": 7280 }, { "epoch": 8.933742331288343, "grad_norm": 0.33878523111343384, "learning_rate": 3.094539746912268e-05, "loss": 0.5793583393096924, "step": 7281 }, { "epoch": 8.934969325153375, "grad_norm": 0.22886399924755096, "learning_rate": 3.094052218774802e-05, "loss": 0.6050984859466553, "step": 7282 }, { "epoch": 8.936196319018405, "grad_norm": 0.2209850400686264, "learning_rate": 3.093564666692785e-05, "loss": 0.814305305480957, "step": 7283 }, { "epoch": 8.937423312883435, "grad_norm": 0.30090242624282837, "learning_rate": 3.09307709068587e-05, "loss": 0.4951132535934448, "step": 7284 }, { "epoch": 8.938650306748466, "grad_norm": 0.27873408794403076, "learning_rate": 3.0925894907737086e-05, "loss": 0.7548747658729553, "step": 7285 }, { "epoch": 8.939877300613498, "grad_norm": 0.19621464610099792, "learning_rate": 3.092101866975955e-05, "loss": 0.7900205850601196, "step": 7286 }, { "epoch": 8.941104294478528, "grad_norm": 0.273838073015213, "learning_rate": 3.091614219312264e-05, "loss": 0.6505957841873169, "step": 7287 }, { "epoch": 8.942331288343558, "grad_norm": 0.32592666149139404, "learning_rate": 3.091126547802291e-05, "loss": 0.8055509924888611, "step": 7288 }, { "epoch": 8.943558282208588, "grad_norm": 0.2669646441936493, "learning_rate": 3.0906388524656924e-05, "loss": 0.7686874866485596, "step": 7289 }, { "epoch": 8.94478527607362, "grad_norm": 0.28299999237060547, "learning_rate": 3.090151133322127e-05, "loss": 0.6043221950531006, "step": 7290 }, { "epoch": 8.94601226993865, "grad_norm": 0.27989718317985535, "learning_rate": 3.0896633903912526e-05, "loss": 0.6967713236808777, "step": 7291 }, { "epoch": 8.94723926380368, "grad_norm": 0.26754793524742126, "learning_rate": 3.089175623692728e-05, "loss": 0.6743597984313965, "step": 7292 }, { "epoch": 8.948466257668711, "grad_norm": 0.28774169087409973, "learning_rate": 3.088687833246216e-05, "loss": 0.6256071329116821, "step": 7293 }, { "epoch": 8.949693251533743, "grad_norm": 0.27943429350852966, "learning_rate": 3.088200019071374e-05, "loss": 0.7879140973091125, "step": 7294 }, { "epoch": 8.950920245398773, "grad_norm": 0.2813675105571747, "learning_rate": 3.087712181187868e-05, "loss": 0.7237752079963684, "step": 7295 }, { "epoch": 8.952147239263804, "grad_norm": 0.32066959142684937, "learning_rate": 3.08722431961536e-05, "loss": 0.6935540437698364, "step": 7296 }, { "epoch": 8.953374233128834, "grad_norm": 0.31044450402259827, "learning_rate": 3.086736434373515e-05, "loss": 0.6106235980987549, "step": 7297 }, { "epoch": 8.954601226993866, "grad_norm": 0.27282437682151794, "learning_rate": 3.086248525481998e-05, "loss": 0.5326861143112183, "step": 7298 }, { "epoch": 8.955828220858896, "grad_norm": 0.3827176094055176, "learning_rate": 3.0857605929604736e-05, "loss": 0.4695777893066406, "step": 7299 }, { "epoch": 8.957055214723926, "grad_norm": 0.23788854479789734, "learning_rate": 3.085272636828612e-05, "loss": 0.7663236856460571, "step": 7300 }, { "epoch": 8.958282208588956, "grad_norm": 0.26393961906433105, "learning_rate": 3.084784657106078e-05, "loss": 0.7955529689788818, "step": 7301 }, { "epoch": 8.959509202453988, "grad_norm": 0.24274423718452454, "learning_rate": 3.0842966538125435e-05, "loss": 0.7478587031364441, "step": 7302 }, { "epoch": 8.960736196319019, "grad_norm": 0.2523268163204193, "learning_rate": 3.083808626967678e-05, "loss": 0.6814353466033936, "step": 7303 }, { "epoch": 8.961963190184049, "grad_norm": 0.30471470952033997, "learning_rate": 3.08332057659115e-05, "loss": 0.7235553860664368, "step": 7304 }, { "epoch": 8.963190184049079, "grad_norm": 0.2947096824645996, "learning_rate": 3.082832502702634e-05, "loss": 0.6994928121566772, "step": 7305 }, { "epoch": 8.964417177914111, "grad_norm": 0.28281518816947937, "learning_rate": 3.082344405321802e-05, "loss": 0.8008790016174316, "step": 7306 }, { "epoch": 8.965644171779141, "grad_norm": 0.26136744022369385, "learning_rate": 3.0818562844683284e-05, "loss": 0.9035662412643433, "step": 7307 }, { "epoch": 8.966871165644172, "grad_norm": 0.2563462555408478, "learning_rate": 3.0813681401618863e-05, "loss": 0.6666399240493774, "step": 7308 }, { "epoch": 8.968098159509202, "grad_norm": 0.28838106989860535, "learning_rate": 3.080879972422154e-05, "loss": 0.7922935485839844, "step": 7309 }, { "epoch": 8.969325153374234, "grad_norm": 0.28418055176734924, "learning_rate": 3.080391781268806e-05, "loss": 0.5610670447349548, "step": 7310 }, { "epoch": 8.970552147239264, "grad_norm": 0.34856876730918884, "learning_rate": 3.079903566721521e-05, "loss": 0.6294105648994446, "step": 7311 }, { "epoch": 8.971779141104294, "grad_norm": 0.39937618374824524, "learning_rate": 3.079415328799977e-05, "loss": 0.41681286692619324, "step": 7312 }, { "epoch": 8.973006134969324, "grad_norm": 0.20373141765594482, "learning_rate": 3.0789270675238536e-05, "loss": 0.715699315071106, "step": 7313 }, { "epoch": 8.974233128834356, "grad_norm": 0.26311415433883667, "learning_rate": 3.078438782912831e-05, "loss": 0.6322806477546692, "step": 7314 }, { "epoch": 8.975460122699387, "grad_norm": 0.2492835372686386, "learning_rate": 3.077950474986592e-05, "loss": 0.9739705324172974, "step": 7315 }, { "epoch": 8.976687116564417, "grad_norm": 0.26436465978622437, "learning_rate": 3.077462143764816e-05, "loss": 0.6382639408111572, "step": 7316 }, { "epoch": 8.977914110429447, "grad_norm": 0.28496628999710083, "learning_rate": 3.076973789267189e-05, "loss": 0.9624109864234924, "step": 7317 }, { "epoch": 8.979141104294479, "grad_norm": 0.3069506585597992, "learning_rate": 3.076485411513394e-05, "loss": 0.5135484933853149, "step": 7318 }, { "epoch": 8.98036809815951, "grad_norm": 0.2513585686683655, "learning_rate": 3.075997010523116e-05, "loss": 0.7843173146247864, "step": 7319 }, { "epoch": 8.98159509202454, "grad_norm": 0.3510933518409729, "learning_rate": 3.075508586316041e-05, "loss": 0.5089659690856934, "step": 7320 }, { "epoch": 8.98282208588957, "grad_norm": 0.349204421043396, "learning_rate": 3.075020138911857e-05, "loss": 0.4953095316886902, "step": 7321 }, { "epoch": 8.984049079754602, "grad_norm": 0.3189634084701538, "learning_rate": 3.074531668330252e-05, "loss": 0.6389802694320679, "step": 7322 }, { "epoch": 8.985276073619632, "grad_norm": 0.2711166739463806, "learning_rate": 3.074043174590912e-05, "loss": 0.6146373152732849, "step": 7323 }, { "epoch": 8.986503067484662, "grad_norm": 0.27088162302970886, "learning_rate": 3.07355465771353e-05, "loss": 0.6436272859573364, "step": 7324 }, { "epoch": 8.987730061349692, "grad_norm": 0.31947091221809387, "learning_rate": 3.0730661177177954e-05, "loss": 0.5947800278663635, "step": 7325 }, { "epoch": 8.988957055214724, "grad_norm": 0.23248958587646484, "learning_rate": 3.0725775546234005e-05, "loss": 0.7690503597259521, "step": 7326 }, { "epoch": 8.990184049079755, "grad_norm": 0.26173365116119385, "learning_rate": 3.0720889684500365e-05, "loss": 0.7476907968521118, "step": 7327 }, { "epoch": 8.991411042944785, "grad_norm": 0.27650463581085205, "learning_rate": 3.071600359217398e-05, "loss": 0.7073291540145874, "step": 7328 }, { "epoch": 8.992638036809815, "grad_norm": 0.24832595884799957, "learning_rate": 3.07111172694518e-05, "loss": 0.8005919456481934, "step": 7329 }, { "epoch": 8.993865030674847, "grad_norm": 0.24979518353939056, "learning_rate": 3.0706230716530774e-05, "loss": 0.7658047676086426, "step": 7330 }, { "epoch": 8.995092024539877, "grad_norm": 0.26696738600730896, "learning_rate": 3.0701343933607854e-05, "loss": 0.6447327136993408, "step": 7331 }, { "epoch": 8.996319018404908, "grad_norm": 0.31603097915649414, "learning_rate": 3.0696456920880024e-05, "loss": 0.5898654460906982, "step": 7332 }, { "epoch": 8.99754601226994, "grad_norm": 0.298206627368927, "learning_rate": 3.0691569678544255e-05, "loss": 0.703490138053894, "step": 7333 }, { "epoch": 8.99877300613497, "grad_norm": 0.26088619232177734, "learning_rate": 3.0686682206797554e-05, "loss": 0.6957827210426331, "step": 7334 }, { "epoch": 9.0, "grad_norm": 0.2591802477836609, "learning_rate": 3.068179450583691e-05, "loss": 0.8670968413352966, "step": 7335 }, { "epoch": 9.00122699386503, "grad_norm": 0.296121209859848, "learning_rate": 3.0676906575859334e-05, "loss": 0.6763209104537964, "step": 7336 }, { "epoch": 9.002453987730062, "grad_norm": 0.2589518129825592, "learning_rate": 3.067201841706185e-05, "loss": 0.7453877925872803, "step": 7337 }, { "epoch": 9.003680981595092, "grad_norm": 0.2736881971359253, "learning_rate": 3.066713002964147e-05, "loss": 0.6441117525100708, "step": 7338 }, { "epoch": 9.004907975460123, "grad_norm": 0.2469881922006607, "learning_rate": 3.0662241413795244e-05, "loss": 0.72698974609375, "step": 7339 }, { "epoch": 9.006134969325153, "grad_norm": 0.27960699796676636, "learning_rate": 3.065735256972022e-05, "loss": 0.44864320755004883, "step": 7340 }, { "epoch": 9.007361963190185, "grad_norm": 0.3336980640888214, "learning_rate": 3.065246349761345e-05, "loss": 0.45097672939300537, "step": 7341 }, { "epoch": 9.008588957055215, "grad_norm": 0.3134456276893616, "learning_rate": 3.0647574197671994e-05, "loss": 0.580527663230896, "step": 7342 }, { "epoch": 9.009815950920245, "grad_norm": 0.22388741374015808, "learning_rate": 3.064268467009293e-05, "loss": 0.6545014381408691, "step": 7343 }, { "epoch": 9.011042944785276, "grad_norm": 0.23912513256072998, "learning_rate": 3.0637794915073346e-05, "loss": 0.7124319076538086, "step": 7344 }, { "epoch": 9.012269938650308, "grad_norm": 0.19965597987174988, "learning_rate": 3.063290493281033e-05, "loss": 0.7836528420448303, "step": 7345 }, { "epoch": 9.013496932515338, "grad_norm": 0.29138725996017456, "learning_rate": 3.0628014723500975e-05, "loss": 0.6138989329338074, "step": 7346 }, { "epoch": 9.014723926380368, "grad_norm": 0.27741360664367676, "learning_rate": 3.0623124287342396e-05, "loss": 0.6285533905029297, "step": 7347 }, { "epoch": 9.015950920245398, "grad_norm": 0.35400936007499695, "learning_rate": 3.061823362453172e-05, "loss": 0.4821036756038666, "step": 7348 }, { "epoch": 9.01717791411043, "grad_norm": 0.24968111515045166, "learning_rate": 3.061334273526606e-05, "loss": 0.5761618614196777, "step": 7349 }, { "epoch": 9.01840490797546, "grad_norm": 0.23316505551338196, "learning_rate": 3.0608451619742575e-05, "loss": 0.581200361251831, "step": 7350 }, { "epoch": 9.01963190184049, "grad_norm": 0.2446000725030899, "learning_rate": 3.06035602781584e-05, "loss": 0.5596803426742554, "step": 7351 }, { "epoch": 9.020858895705521, "grad_norm": 0.29683399200439453, "learning_rate": 3.059866871071069e-05, "loss": 0.5139368176460266, "step": 7352 }, { "epoch": 9.022085889570553, "grad_norm": 0.2589855492115021, "learning_rate": 3.059377691759661e-05, "loss": 0.6494589447975159, "step": 7353 }, { "epoch": 9.023312883435583, "grad_norm": 0.24144507944583893, "learning_rate": 3.058888489901334e-05, "loss": 0.8044906258583069, "step": 7354 }, { "epoch": 9.024539877300613, "grad_norm": 0.26368990540504456, "learning_rate": 3.058399265515806e-05, "loss": 0.7187401056289673, "step": 7355 }, { "epoch": 9.025766871165644, "grad_norm": 0.25060102343559265, "learning_rate": 3.057910018622796e-05, "loss": 0.4900599718093872, "step": 7356 }, { "epoch": 9.026993865030676, "grad_norm": 0.2672537863254547, "learning_rate": 3.057420749242024e-05, "loss": 0.7498528957366943, "step": 7357 }, { "epoch": 9.028220858895706, "grad_norm": 0.2515202462673187, "learning_rate": 3.056931457393212e-05, "loss": 0.4622898995876312, "step": 7358 }, { "epoch": 9.029447852760736, "grad_norm": 0.26733341813087463, "learning_rate": 3.0564421430960815e-05, "loss": 0.6446418762207031, "step": 7359 }, { "epoch": 9.030674846625766, "grad_norm": 0.2251073569059372, "learning_rate": 3.0559528063703546e-05, "loss": 0.7053642272949219, "step": 7360 }, { "epoch": 9.031901840490798, "grad_norm": 0.29026859998703003, "learning_rate": 3.0554634472357566e-05, "loss": 0.7607076168060303, "step": 7361 }, { "epoch": 9.033128834355828, "grad_norm": 0.3442672789096832, "learning_rate": 3.05497406571201e-05, "loss": 0.6273346543312073, "step": 7362 }, { "epoch": 9.034355828220859, "grad_norm": 0.24220526218414307, "learning_rate": 3.054484661818843e-05, "loss": 0.7920604348182678, "step": 7363 }, { "epoch": 9.035582822085889, "grad_norm": 0.32427677512168884, "learning_rate": 3.053995235575979e-05, "loss": 0.7603458166122437, "step": 7364 }, { "epoch": 9.036809815950921, "grad_norm": 0.2800743281841278, "learning_rate": 3.053505787003149e-05, "loss": 0.7642684578895569, "step": 7365 }, { "epoch": 9.038036809815951, "grad_norm": 0.23615889251232147, "learning_rate": 3.0530163161200785e-05, "loss": 0.7021688222885132, "step": 7366 }, { "epoch": 9.039263803680981, "grad_norm": 0.3288448750972748, "learning_rate": 3.0525268229464976e-05, "loss": 0.7993475198745728, "step": 7367 }, { "epoch": 9.040490797546012, "grad_norm": 0.28221607208251953, "learning_rate": 3.052037307502136e-05, "loss": 0.7530108690261841, "step": 7368 }, { "epoch": 9.041717791411044, "grad_norm": 0.32157620787620544, "learning_rate": 3.051547769806726e-05, "loss": 0.7060315608978271, "step": 7369 }, { "epoch": 9.042944785276074, "grad_norm": 0.3007447123527527, "learning_rate": 3.0510582098799972e-05, "loss": 0.4324828088283539, "step": 7370 }, { "epoch": 9.044171779141104, "grad_norm": 0.26198610663414, "learning_rate": 3.0505686277416846e-05, "loss": 0.6505955457687378, "step": 7371 }, { "epoch": 9.045398773006134, "grad_norm": 0.26572951674461365, "learning_rate": 3.05007902341152e-05, "loss": 0.5829328298568726, "step": 7372 }, { "epoch": 9.046625766871166, "grad_norm": 0.2360904961824417, "learning_rate": 3.0495893969092392e-05, "loss": 0.7105219960212708, "step": 7373 }, { "epoch": 9.047852760736196, "grad_norm": 0.23332954943180084, "learning_rate": 3.0490997482545775e-05, "loss": 0.6318018436431885, "step": 7374 }, { "epoch": 9.049079754601227, "grad_norm": 0.23576273024082184, "learning_rate": 3.0486100774672714e-05, "loss": 0.68918776512146, "step": 7375 }, { "epoch": 9.050306748466257, "grad_norm": 0.2700045704841614, "learning_rate": 3.048120384567057e-05, "loss": 0.6445444822311401, "step": 7376 }, { "epoch": 9.051533742331289, "grad_norm": 0.2293892800807953, "learning_rate": 3.0476306695736733e-05, "loss": 0.6711151599884033, "step": 7377 }, { "epoch": 9.05276073619632, "grad_norm": 0.2288317233324051, "learning_rate": 3.04714093250686e-05, "loss": 0.9386826753616333, "step": 7378 }, { "epoch": 9.05398773006135, "grad_norm": 0.2947820723056793, "learning_rate": 3.0466511733863556e-05, "loss": 0.7054396271705627, "step": 7379 }, { "epoch": 9.05521472392638, "grad_norm": 0.29466837644577026, "learning_rate": 3.0461613922319017e-05, "loss": 0.622234582901001, "step": 7380 }, { "epoch": 9.056441717791412, "grad_norm": 0.2628059983253479, "learning_rate": 3.0456715890632393e-05, "loss": 0.6884297132492065, "step": 7381 }, { "epoch": 9.057668711656442, "grad_norm": 0.23712894320487976, "learning_rate": 3.045181763900112e-05, "loss": 0.7185887098312378, "step": 7382 }, { "epoch": 9.058895705521472, "grad_norm": 0.3338772654533386, "learning_rate": 3.0446919167622616e-05, "loss": 0.8875395059585571, "step": 7383 }, { "epoch": 9.060122699386502, "grad_norm": 0.34338584542274475, "learning_rate": 3.0442020476694343e-05, "loss": 0.7231480479240417, "step": 7384 }, { "epoch": 9.061349693251534, "grad_norm": 0.24539610743522644, "learning_rate": 3.0437121566413752e-05, "loss": 0.7466060519218445, "step": 7385 }, { "epoch": 9.062576687116565, "grad_norm": 0.2508712708950043, "learning_rate": 3.043222243697829e-05, "loss": 0.6429874897003174, "step": 7386 }, { "epoch": 9.063803680981595, "grad_norm": 1.8327422142028809, "learning_rate": 3.0427323088585442e-05, "loss": 0.8325308561325073, "step": 7387 }, { "epoch": 9.065030674846625, "grad_norm": 0.24255503714084625, "learning_rate": 3.0422423521432674e-05, "loss": 0.6996567249298096, "step": 7388 }, { "epoch": 9.066257668711657, "grad_norm": 0.28932225704193115, "learning_rate": 3.0417523735717478e-05, "loss": 0.7913018465042114, "step": 7389 }, { "epoch": 9.067484662576687, "grad_norm": 0.29147255420684814, "learning_rate": 3.0412623731637357e-05, "loss": 0.6432342529296875, "step": 7390 }, { "epoch": 9.068711656441717, "grad_norm": 0.3227844834327698, "learning_rate": 3.0407723509389802e-05, "loss": 0.5671402215957642, "step": 7391 }, { "epoch": 9.069938650306748, "grad_norm": 0.28567641973495483, "learning_rate": 3.040282306917235e-05, "loss": 0.6890019178390503, "step": 7392 }, { "epoch": 9.07116564417178, "grad_norm": 0.3241347670555115, "learning_rate": 3.0397922411182494e-05, "loss": 0.5300353765487671, "step": 7393 }, { "epoch": 9.07239263803681, "grad_norm": 0.27165085077285767, "learning_rate": 3.0393021535617784e-05, "loss": 0.6163440346717834, "step": 7394 }, { "epoch": 9.07361963190184, "grad_norm": 0.3439067304134369, "learning_rate": 3.0388120442675762e-05, "loss": 0.5360430479049683, "step": 7395 }, { "epoch": 9.07484662576687, "grad_norm": 0.27144134044647217, "learning_rate": 3.0383219132553964e-05, "loss": 0.7084920406341553, "step": 7396 }, { "epoch": 9.076073619631902, "grad_norm": 0.2751435935497284, "learning_rate": 3.037831760544997e-05, "loss": 0.6713666915893555, "step": 7397 }, { "epoch": 9.077300613496933, "grad_norm": 0.23014317452907562, "learning_rate": 3.037341586156132e-05, "loss": 0.8245587348937988, "step": 7398 }, { "epoch": 9.078527607361963, "grad_norm": 0.2970421612262726, "learning_rate": 3.0368513901085614e-05, "loss": 0.6989257335662842, "step": 7399 }, { "epoch": 9.079754601226995, "grad_norm": 0.3034706115722656, "learning_rate": 3.0363611724220416e-05, "loss": 0.47356605529785156, "step": 7400 }, { "epoch": 9.080981595092025, "grad_norm": 0.2845655083656311, "learning_rate": 3.0358709331163325e-05, "loss": 0.5847431421279907, "step": 7401 }, { "epoch": 9.082208588957055, "grad_norm": 0.31050607562065125, "learning_rate": 3.0353806722111956e-05, "loss": 0.4766558110713959, "step": 7402 }, { "epoch": 9.083435582822085, "grad_norm": 0.29955437779426575, "learning_rate": 3.0348903897263893e-05, "loss": 0.6670199632644653, "step": 7403 }, { "epoch": 9.084662576687117, "grad_norm": 0.2591475248336792, "learning_rate": 3.034400085681678e-05, "loss": 0.5637373924255371, "step": 7404 }, { "epoch": 9.085889570552148, "grad_norm": 0.2563033103942871, "learning_rate": 3.0339097600968223e-05, "loss": 0.4265803098678589, "step": 7405 }, { "epoch": 9.087116564417178, "grad_norm": 0.2358119636774063, "learning_rate": 3.0334194129915883e-05, "loss": 0.5926374793052673, "step": 7406 }, { "epoch": 9.088343558282208, "grad_norm": 0.2582581639289856, "learning_rate": 3.0329290443857384e-05, "loss": 0.7824609279632568, "step": 7407 }, { "epoch": 9.08957055214724, "grad_norm": 0.3462428152561188, "learning_rate": 3.0324386542990378e-05, "loss": 0.5491584539413452, "step": 7408 }, { "epoch": 9.09079754601227, "grad_norm": 0.3165333867073059, "learning_rate": 3.0319482427512552e-05, "loss": 0.5206329226493835, "step": 7409 }, { "epoch": 9.0920245398773, "grad_norm": 0.2538965046405792, "learning_rate": 3.0314578097621553e-05, "loss": 0.5857971906661987, "step": 7410 }, { "epoch": 9.09325153374233, "grad_norm": 0.28573569655418396, "learning_rate": 3.0309673553515074e-05, "loss": 0.5875896215438843, "step": 7411 }, { "epoch": 9.094478527607363, "grad_norm": 0.2610485553741455, "learning_rate": 3.0304768795390793e-05, "loss": 0.4445989727973938, "step": 7412 }, { "epoch": 9.095705521472393, "grad_norm": 0.2425917387008667, "learning_rate": 3.029986382344641e-05, "loss": 0.5475727319717407, "step": 7413 }, { "epoch": 9.096932515337423, "grad_norm": 0.273100346326828, "learning_rate": 3.029495863787964e-05, "loss": 0.5463051795959473, "step": 7414 }, { "epoch": 9.098159509202453, "grad_norm": 0.2379591017961502, "learning_rate": 3.029005323888818e-05, "loss": 0.6428968906402588, "step": 7415 }, { "epoch": 9.099386503067485, "grad_norm": 0.28011757135391235, "learning_rate": 3.0285147626669765e-05, "loss": 0.572023332118988, "step": 7416 }, { "epoch": 9.100613496932516, "grad_norm": 0.2669103443622589, "learning_rate": 3.0280241801422114e-05, "loss": 0.639069676399231, "step": 7417 }, { "epoch": 9.101840490797546, "grad_norm": 0.25720563530921936, "learning_rate": 3.0275335763342992e-05, "loss": 0.7976073622703552, "step": 7418 }, { "epoch": 9.103067484662576, "grad_norm": 0.2658868730068207, "learning_rate": 3.0270429512630123e-05, "loss": 0.7148659229278564, "step": 7419 }, { "epoch": 9.104294478527608, "grad_norm": 0.3280923366546631, "learning_rate": 3.026552304948127e-05, "loss": 0.6497167348861694, "step": 7420 }, { "epoch": 9.105521472392638, "grad_norm": 0.2812238931655884, "learning_rate": 3.026061637409421e-05, "loss": 0.6178398728370667, "step": 7421 }, { "epoch": 9.106748466257669, "grad_norm": 0.28170081973075867, "learning_rate": 3.0255709486666693e-05, "loss": 0.5342304706573486, "step": 7422 }, { "epoch": 9.107975460122699, "grad_norm": 0.34692150354385376, "learning_rate": 3.0250802387396526e-05, "loss": 0.6609295606613159, "step": 7423 }, { "epoch": 9.10920245398773, "grad_norm": 0.31554919481277466, "learning_rate": 3.024589507648149e-05, "loss": 0.5817093253135681, "step": 7424 }, { "epoch": 9.110429447852761, "grad_norm": 0.24757356941699982, "learning_rate": 3.024098755411938e-05, "loss": 0.7836809158325195, "step": 7425 }, { "epoch": 9.111656441717791, "grad_norm": 0.2482835352420807, "learning_rate": 3.0236079820508022e-05, "loss": 0.7241526246070862, "step": 7426 }, { "epoch": 9.112883435582821, "grad_norm": 0.31629306077957153, "learning_rate": 3.0231171875845203e-05, "loss": 0.5359227657318115, "step": 7427 }, { "epoch": 9.114110429447853, "grad_norm": 0.24608111381530762, "learning_rate": 3.022626372032878e-05, "loss": 0.6311566829681396, "step": 7428 }, { "epoch": 9.115337423312884, "grad_norm": 0.23309721052646637, "learning_rate": 3.022135535415656e-05, "loss": 0.8308299779891968, "step": 7429 }, { "epoch": 9.116564417177914, "grad_norm": 0.2960256040096283, "learning_rate": 3.0216446777526403e-05, "loss": 0.7008530497550964, "step": 7430 }, { "epoch": 9.117791411042944, "grad_norm": 0.31650790572166443, "learning_rate": 3.0211537990636157e-05, "loss": 0.5633217692375183, "step": 7431 }, { "epoch": 9.119018404907976, "grad_norm": 0.2600705623626709, "learning_rate": 3.0206628993683678e-05, "loss": 0.6870869994163513, "step": 7432 }, { "epoch": 9.120245398773006, "grad_norm": 0.2617870271205902, "learning_rate": 3.0201719786866833e-05, "loss": 0.6060721278190613, "step": 7433 }, { "epoch": 9.121472392638037, "grad_norm": 0.24846133589744568, "learning_rate": 3.01968103703835e-05, "loss": 0.6779005527496338, "step": 7434 }, { "epoch": 9.122699386503067, "grad_norm": 0.25523385405540466, "learning_rate": 3.0191900744431556e-05, "loss": 0.6177393198013306, "step": 7435 }, { "epoch": 9.123926380368099, "grad_norm": 0.2827543020248413, "learning_rate": 3.0186990909208913e-05, "loss": 0.613064706325531, "step": 7436 }, { "epoch": 9.125153374233129, "grad_norm": 0.2682885527610779, "learning_rate": 3.0182080864913452e-05, "loss": 0.7453779578208923, "step": 7437 }, { "epoch": 9.12638036809816, "grad_norm": 0.2972446084022522, "learning_rate": 3.017717061174309e-05, "loss": 0.6715078949928284, "step": 7438 }, { "epoch": 9.12760736196319, "grad_norm": 0.3281534016132355, "learning_rate": 3.0172260149895748e-05, "loss": 0.5122497081756592, "step": 7439 }, { "epoch": 9.128834355828221, "grad_norm": 0.2607539892196655, "learning_rate": 3.0167349479569352e-05, "loss": 0.53836989402771, "step": 7440 }, { "epoch": 9.130061349693252, "grad_norm": 0.23351851105690002, "learning_rate": 3.0162438600961834e-05, "loss": 0.7047096490859985, "step": 7441 }, { "epoch": 9.131288343558282, "grad_norm": 0.25169000029563904, "learning_rate": 3.0157527514271146e-05, "loss": 0.44990143179893494, "step": 7442 }, { "epoch": 9.132515337423312, "grad_norm": 0.3532118499279022, "learning_rate": 3.0152616219695227e-05, "loss": 0.5653449296951294, "step": 7443 }, { "epoch": 9.133742331288344, "grad_norm": 0.23850615322589874, "learning_rate": 3.0147704717432045e-05, "loss": 0.7019792795181274, "step": 7444 }, { "epoch": 9.134969325153374, "grad_norm": 0.25233522057533264, "learning_rate": 3.014279300767957e-05, "loss": 0.7077746391296387, "step": 7445 }, { "epoch": 9.136196319018405, "grad_norm": 0.25753259658813477, "learning_rate": 3.0137881090635778e-05, "loss": 0.5733458399772644, "step": 7446 }, { "epoch": 9.137423312883435, "grad_norm": 0.2539222538471222, "learning_rate": 3.0132968966498647e-05, "loss": 0.8266599178314209, "step": 7447 }, { "epoch": 9.138650306748467, "grad_norm": 0.25558194518089294, "learning_rate": 3.0128056635466184e-05, "loss": 0.603582501411438, "step": 7448 }, { "epoch": 9.139877300613497, "grad_norm": 0.2655004560947418, "learning_rate": 3.012314409773637e-05, "loss": 0.6493314504623413, "step": 7449 }, { "epoch": 9.141104294478527, "grad_norm": 0.2056698054075241, "learning_rate": 3.0118231353507243e-05, "loss": 0.6936132311820984, "step": 7450 }, { "epoch": 9.142331288343557, "grad_norm": 0.24070613086223602, "learning_rate": 3.0113318402976803e-05, "loss": 0.6608980298042297, "step": 7451 }, { "epoch": 9.14355828220859, "grad_norm": 0.31415802240371704, "learning_rate": 3.0108405246343085e-05, "loss": 0.7088459134101868, "step": 7452 }, { "epoch": 9.14478527607362, "grad_norm": 0.3445816934108734, "learning_rate": 3.010349188380412e-05, "loss": 0.686359167098999, "step": 7453 }, { "epoch": 9.14601226993865, "grad_norm": 0.3154098689556122, "learning_rate": 3.0098578315557947e-05, "loss": 0.6396521925926208, "step": 7454 }, { "epoch": 9.14723926380368, "grad_norm": 0.23053133487701416, "learning_rate": 3.0093664541802637e-05, "loss": 0.8652116060256958, "step": 7455 }, { "epoch": 9.148466257668712, "grad_norm": 0.21175000071525574, "learning_rate": 3.0088750562736224e-05, "loss": 0.6729369163513184, "step": 7456 }, { "epoch": 9.149693251533742, "grad_norm": 0.3049505054950714, "learning_rate": 3.0083836378556795e-05, "loss": 0.6238043308258057, "step": 7457 }, { "epoch": 9.150920245398773, "grad_norm": 0.31271907687187195, "learning_rate": 3.0078921989462417e-05, "loss": 0.6198495030403137, "step": 7458 }, { "epoch": 9.152147239263805, "grad_norm": 0.31908756494522095, "learning_rate": 3.0074007395651183e-05, "loss": 0.48177653551101685, "step": 7459 }, { "epoch": 9.153374233128835, "grad_norm": 0.24665771424770355, "learning_rate": 3.006909259732118e-05, "loss": 0.6108826398849487, "step": 7460 }, { "epoch": 9.154601226993865, "grad_norm": 0.32141777873039246, "learning_rate": 3.0064177594670505e-05, "loss": 0.5387704372406006, "step": 7461 }, { "epoch": 9.155828220858895, "grad_norm": 0.29194578528404236, "learning_rate": 3.0059262387897293e-05, "loss": 0.8732141256332397, "step": 7462 }, { "epoch": 9.157055214723927, "grad_norm": 0.3590603470802307, "learning_rate": 3.0054346977199628e-05, "loss": 0.6625794172286987, "step": 7463 }, { "epoch": 9.158282208588957, "grad_norm": 0.27325525879859924, "learning_rate": 3.004943136277566e-05, "loss": 0.6204054951667786, "step": 7464 }, { "epoch": 9.159509202453988, "grad_norm": 0.34949663281440735, "learning_rate": 3.004451554482351e-05, "loss": 0.5236057639122009, "step": 7465 }, { "epoch": 9.160736196319018, "grad_norm": 0.26044923067092896, "learning_rate": 3.0039599523541327e-05, "loss": 0.6572288274765015, "step": 7466 }, { "epoch": 9.16196319018405, "grad_norm": 0.24770839512348175, "learning_rate": 3.0034683299127265e-05, "loss": 0.7802344560623169, "step": 7467 }, { "epoch": 9.16319018404908, "grad_norm": 0.25209134817123413, "learning_rate": 3.0029766871779474e-05, "loss": 0.6126032471656799, "step": 7468 }, { "epoch": 9.16441717791411, "grad_norm": 0.2521890699863434, "learning_rate": 3.002485024169613e-05, "loss": 0.7192068099975586, "step": 7469 }, { "epoch": 9.16564417177914, "grad_norm": 0.2992079257965088, "learning_rate": 3.0019933409075396e-05, "loss": 0.764167845249176, "step": 7470 }, { "epoch": 9.166871165644173, "grad_norm": 0.36328399181365967, "learning_rate": 3.001501637411547e-05, "loss": 0.6116616129875183, "step": 7471 }, { "epoch": 9.168098159509203, "grad_norm": 0.28493407368659973, "learning_rate": 3.0010099137014535e-05, "loss": 0.6473956108093262, "step": 7472 }, { "epoch": 9.169325153374233, "grad_norm": 0.23843032121658325, "learning_rate": 3.000518169797079e-05, "loss": 0.7984029054641724, "step": 7473 }, { "epoch": 9.170552147239263, "grad_norm": 0.275706022977829, "learning_rate": 3.0000264057182452e-05, "loss": 0.7644747495651245, "step": 7474 }, { "epoch": 9.171779141104295, "grad_norm": 0.2212522327899933, "learning_rate": 2.999534621484773e-05, "loss": 0.6593260765075684, "step": 7475 }, { "epoch": 9.173006134969325, "grad_norm": 0.2611613869667053, "learning_rate": 2.999042817116484e-05, "loss": 0.6876543164253235, "step": 7476 }, { "epoch": 9.174233128834356, "grad_norm": 0.26266348361968994, "learning_rate": 2.998550992633204e-05, "loss": 0.5700741410255432, "step": 7477 }, { "epoch": 9.175460122699386, "grad_norm": 0.23435449600219727, "learning_rate": 2.9980591480547542e-05, "loss": 0.6670102477073669, "step": 7478 }, { "epoch": 9.176687116564418, "grad_norm": 0.26116645336151123, "learning_rate": 2.9975672834009618e-05, "loss": 0.4054142236709595, "step": 7479 }, { "epoch": 9.177914110429448, "grad_norm": 0.31158143281936646, "learning_rate": 2.99707539869165e-05, "loss": 0.5973870754241943, "step": 7480 }, { "epoch": 9.179141104294478, "grad_norm": 0.3949293792247772, "learning_rate": 2.9965834939466468e-05, "loss": 0.42133429646492004, "step": 7481 }, { "epoch": 9.180368098159509, "grad_norm": 0.26655474305152893, "learning_rate": 2.99609156918578e-05, "loss": 0.6874210834503174, "step": 7482 }, { "epoch": 9.18159509202454, "grad_norm": 0.2805722951889038, "learning_rate": 2.995599624428876e-05, "loss": 0.7574355602264404, "step": 7483 }, { "epoch": 9.18282208588957, "grad_norm": 0.33196115493774414, "learning_rate": 2.9951076596957657e-05, "loss": 0.7611243724822998, "step": 7484 }, { "epoch": 9.184049079754601, "grad_norm": 0.2890527844429016, "learning_rate": 2.994615675006277e-05, "loss": 0.5889701843261719, "step": 7485 }, { "epoch": 9.185276073619631, "grad_norm": 0.2713554799556732, "learning_rate": 2.9941236703802413e-05, "loss": 0.6962018609046936, "step": 7486 }, { "epoch": 9.186503067484663, "grad_norm": 0.3107219338417053, "learning_rate": 2.9936316458374895e-05, "loss": 0.5884355306625366, "step": 7487 }, { "epoch": 9.187730061349694, "grad_norm": 0.30575719475746155, "learning_rate": 2.9931396013978543e-05, "loss": 0.590141236782074, "step": 7488 }, { "epoch": 9.188957055214724, "grad_norm": 0.30686840415000916, "learning_rate": 2.9926475370811685e-05, "loss": 0.6328819990158081, "step": 7489 }, { "epoch": 9.190184049079754, "grad_norm": 0.27058783173561096, "learning_rate": 2.9921554529072648e-05, "loss": 0.5867058038711548, "step": 7490 }, { "epoch": 9.191411042944786, "grad_norm": 0.22327743470668793, "learning_rate": 2.9916633488959795e-05, "loss": 0.8119461536407471, "step": 7491 }, { "epoch": 9.192638036809816, "grad_norm": 0.2599509656429291, "learning_rate": 2.991171225067146e-05, "loss": 0.9014034867286682, "step": 7492 }, { "epoch": 9.193865030674846, "grad_norm": 0.2555895149707794, "learning_rate": 2.990679081440601e-05, "loss": 0.8600993156433105, "step": 7493 }, { "epoch": 9.195092024539877, "grad_norm": 0.3258333206176758, "learning_rate": 2.990186918036182e-05, "loss": 0.5757613182067871, "step": 7494 }, { "epoch": 9.196319018404909, "grad_norm": 0.2686983048915863, "learning_rate": 2.9896947348737264e-05, "loss": 0.6638908386230469, "step": 7495 }, { "epoch": 9.197546012269939, "grad_norm": 0.2558249533176422, "learning_rate": 2.9892025319730732e-05, "loss": 0.6477856636047363, "step": 7496 }, { "epoch": 9.198773006134969, "grad_norm": 0.3713017702102661, "learning_rate": 2.988710309354061e-05, "loss": 0.45661383867263794, "step": 7497 }, { "epoch": 9.2, "grad_norm": 0.22509022057056427, "learning_rate": 2.9882180670365302e-05, "loss": 0.7678459286689758, "step": 7498 }, { "epoch": 9.201226993865031, "grad_norm": 0.25444525480270386, "learning_rate": 2.9877258050403212e-05, "loss": 0.6754938364028931, "step": 7499 }, { "epoch": 9.202453987730062, "grad_norm": 0.21937522292137146, "learning_rate": 2.9872335233852753e-05, "loss": 0.7130722999572754, "step": 7500 }, { "epoch": 9.203680981595092, "grad_norm": 0.3296429514884949, "learning_rate": 2.9867412220912373e-05, "loss": 0.47471854090690613, "step": 7501 }, { "epoch": 9.204907975460122, "grad_norm": 0.2539350688457489, "learning_rate": 2.986248901178048e-05, "loss": 0.621302604675293, "step": 7502 }, { "epoch": 9.206134969325154, "grad_norm": 0.3025975227355957, "learning_rate": 2.9857565606655525e-05, "loss": 0.6851388812065125, "step": 7503 }, { "epoch": 9.207361963190184, "grad_norm": 0.24654842913150787, "learning_rate": 2.9852642005735952e-05, "loss": 0.8002132177352905, "step": 7504 }, { "epoch": 9.208588957055214, "grad_norm": 0.27498549222946167, "learning_rate": 2.9847718209220223e-05, "loss": 0.653945803642273, "step": 7505 }, { "epoch": 9.209815950920245, "grad_norm": 0.2690621018409729, "learning_rate": 2.9842794217306796e-05, "loss": 0.6254643797874451, "step": 7506 }, { "epoch": 9.211042944785277, "grad_norm": 0.308894544839859, "learning_rate": 2.9837870030194142e-05, "loss": 0.4211542010307312, "step": 7507 }, { "epoch": 9.212269938650307, "grad_norm": 0.26634684205055237, "learning_rate": 2.9832945648080758e-05, "loss": 0.579250156879425, "step": 7508 }, { "epoch": 9.213496932515337, "grad_norm": 0.3636866807937622, "learning_rate": 2.9828021071165107e-05, "loss": 0.5857384204864502, "step": 7509 }, { "epoch": 9.214723926380367, "grad_norm": 0.31991586089134216, "learning_rate": 2.982309629964571e-05, "loss": 0.5316058397293091, "step": 7510 }, { "epoch": 9.2159509202454, "grad_norm": 0.25709205865859985, "learning_rate": 2.9818171333721046e-05, "loss": 0.7719056606292725, "step": 7511 }, { "epoch": 9.21717791411043, "grad_norm": 0.27563905715942383, "learning_rate": 2.9813246173589638e-05, "loss": 0.7248673439025879, "step": 7512 }, { "epoch": 9.21840490797546, "grad_norm": 0.23976576328277588, "learning_rate": 2.980832081945001e-05, "loss": 0.6217837333679199, "step": 7513 }, { "epoch": 9.21963190184049, "grad_norm": 0.2763012647628784, "learning_rate": 2.980339527150068e-05, "loss": 0.45429331064224243, "step": 7514 }, { "epoch": 9.220858895705522, "grad_norm": 0.3483484089374542, "learning_rate": 2.9798469529940188e-05, "loss": 0.5901687145233154, "step": 7515 }, { "epoch": 9.222085889570552, "grad_norm": 0.2635708451271057, "learning_rate": 2.9793543594967065e-05, "loss": 0.9233123064041138, "step": 7516 }, { "epoch": 9.223312883435582, "grad_norm": 0.25603383779525757, "learning_rate": 2.9788617466779882e-05, "loss": 0.7522091269493103, "step": 7517 }, { "epoch": 9.224539877300613, "grad_norm": 0.23955532908439636, "learning_rate": 2.9783691145577187e-05, "loss": 0.6741703748703003, "step": 7518 }, { "epoch": 9.225766871165645, "grad_norm": 0.3064340651035309, "learning_rate": 2.977876463155754e-05, "loss": 0.5565896034240723, "step": 7519 }, { "epoch": 9.226993865030675, "grad_norm": 0.29353711009025574, "learning_rate": 2.9773837924919525e-05, "loss": 0.642641544342041, "step": 7520 }, { "epoch": 9.228220858895705, "grad_norm": 0.2845744490623474, "learning_rate": 2.976891102586171e-05, "loss": 0.7734159231185913, "step": 7521 }, { "epoch": 9.229447852760735, "grad_norm": 0.2844148576259613, "learning_rate": 2.9763983934582708e-05, "loss": 0.8177595138549805, "step": 7522 }, { "epoch": 9.230674846625767, "grad_norm": 0.24549691379070282, "learning_rate": 2.9759056651281086e-05, "loss": 0.7369805574417114, "step": 7523 }, { "epoch": 9.231901840490798, "grad_norm": 0.19116060435771942, "learning_rate": 2.975412917615547e-05, "loss": 0.7892365455627441, "step": 7524 }, { "epoch": 9.233128834355828, "grad_norm": 0.23952259123325348, "learning_rate": 2.974920150940447e-05, "loss": 0.6618221998214722, "step": 7525 }, { "epoch": 9.23435582822086, "grad_norm": 0.280308336019516, "learning_rate": 2.9744273651226694e-05, "loss": 0.6016110181808472, "step": 7526 }, { "epoch": 9.23558282208589, "grad_norm": 0.23510447144508362, "learning_rate": 2.9739345601820783e-05, "loss": 0.7724440097808838, "step": 7527 }, { "epoch": 9.23680981595092, "grad_norm": 0.2890862822532654, "learning_rate": 2.9734417361385364e-05, "loss": 0.6336492896080017, "step": 7528 }, { "epoch": 9.23803680981595, "grad_norm": 0.24811342358589172, "learning_rate": 2.9729488930119088e-05, "loss": 0.697891116142273, "step": 7529 }, { "epoch": 9.239263803680982, "grad_norm": 0.29714998602867126, "learning_rate": 2.9724560308220606e-05, "loss": 0.4765312075614929, "step": 7530 }, { "epoch": 9.240490797546013, "grad_norm": 0.27987271547317505, "learning_rate": 2.971963149588857e-05, "loss": 0.6764036417007446, "step": 7531 }, { "epoch": 9.241717791411043, "grad_norm": 0.3114593029022217, "learning_rate": 2.9714702493321645e-05, "loss": 0.8696925640106201, "step": 7532 }, { "epoch": 9.242944785276073, "grad_norm": 0.23253180086612701, "learning_rate": 2.9709773300718513e-05, "loss": 0.8452754616737366, "step": 7533 }, { "epoch": 9.244171779141105, "grad_norm": 0.2512752413749695, "learning_rate": 2.9704843918277845e-05, "loss": 0.8402657508850098, "step": 7534 }, { "epoch": 9.245398773006135, "grad_norm": 0.2738986313343048, "learning_rate": 2.969991434619835e-05, "loss": 0.74590665102005, "step": 7535 }, { "epoch": 9.246625766871166, "grad_norm": 0.28611230850219727, "learning_rate": 2.9694984584678704e-05, "loss": 0.6006065607070923, "step": 7536 }, { "epoch": 9.247852760736196, "grad_norm": 0.2490602731704712, "learning_rate": 2.9690054633917624e-05, "loss": 0.9112371206283569, "step": 7537 }, { "epoch": 9.249079754601228, "grad_norm": 0.2506852447986603, "learning_rate": 2.9685124494113807e-05, "loss": 0.6238244771957397, "step": 7538 }, { "epoch": 9.250306748466258, "grad_norm": 0.2743850350379944, "learning_rate": 2.9680194165465997e-05, "loss": 0.6677420735359192, "step": 7539 }, { "epoch": 9.251533742331288, "grad_norm": 0.2993289530277252, "learning_rate": 2.96752636481729e-05, "loss": 0.6489118337631226, "step": 7540 }, { "epoch": 9.252760736196318, "grad_norm": 0.26375240087509155, "learning_rate": 2.9670332942433266e-05, "loss": 0.6200388669967651, "step": 7541 }, { "epoch": 9.25398773006135, "grad_norm": 0.22277775406837463, "learning_rate": 2.9665402048445827e-05, "loss": 0.7275295257568359, "step": 7542 }, { "epoch": 9.25521472392638, "grad_norm": 0.3698776364326477, "learning_rate": 2.966047096640934e-05, "loss": 0.6288135647773743, "step": 7543 }, { "epoch": 9.256441717791411, "grad_norm": 0.24469679594039917, "learning_rate": 2.965553969652256e-05, "loss": 0.6934951543807983, "step": 7544 }, { "epoch": 9.257668711656441, "grad_norm": 0.25791212916374207, "learning_rate": 2.9650608238984256e-05, "loss": 0.8094515800476074, "step": 7545 }, { "epoch": 9.258895705521473, "grad_norm": 0.3033061921596527, "learning_rate": 2.964567659399319e-05, "loss": 0.8204331994056702, "step": 7546 }, { "epoch": 9.260122699386503, "grad_norm": 0.3049168288707733, "learning_rate": 2.9640744761748157e-05, "loss": 0.7094741463661194, "step": 7547 }, { "epoch": 9.261349693251534, "grad_norm": 0.295782208442688, "learning_rate": 2.9635812742447937e-05, "loss": 0.584261417388916, "step": 7548 }, { "epoch": 9.262576687116564, "grad_norm": 0.2546907067298889, "learning_rate": 2.9630880536291328e-05, "loss": 0.7291712760925293, "step": 7549 }, { "epoch": 9.263803680981596, "grad_norm": 0.2994242310523987, "learning_rate": 2.9625948143477127e-05, "loss": 0.745712161064148, "step": 7550 }, { "epoch": 9.265030674846626, "grad_norm": 0.27104806900024414, "learning_rate": 2.9621015564204158e-05, "loss": 0.7484103441238403, "step": 7551 }, { "epoch": 9.266257668711656, "grad_norm": 0.2576417326927185, "learning_rate": 2.9616082798671225e-05, "loss": 0.5917141437530518, "step": 7552 }, { "epoch": 9.267484662576686, "grad_norm": 0.3459276854991913, "learning_rate": 2.9611149847077158e-05, "loss": 0.6502370834350586, "step": 7553 }, { "epoch": 9.268711656441718, "grad_norm": 0.2935909926891327, "learning_rate": 2.96062167096208e-05, "loss": 0.4744696617126465, "step": 7554 }, { "epoch": 9.269938650306749, "grad_norm": 0.2621995210647583, "learning_rate": 2.960128338650098e-05, "loss": 0.5062054395675659, "step": 7555 }, { "epoch": 9.271165644171779, "grad_norm": 0.21532848477363586, "learning_rate": 2.9596349877916552e-05, "loss": 0.6695267558097839, "step": 7556 }, { "epoch": 9.27239263803681, "grad_norm": 0.22677752375602722, "learning_rate": 2.959141618406636e-05, "loss": 0.7478839159011841, "step": 7557 }, { "epoch": 9.273619631901841, "grad_norm": 0.3106689751148224, "learning_rate": 2.9586482305149287e-05, "loss": 0.4104973077774048, "step": 7558 }, { "epoch": 9.274846625766871, "grad_norm": 0.3011329174041748, "learning_rate": 2.9581548241364192e-05, "loss": 0.5148928761482239, "step": 7559 }, { "epoch": 9.276073619631902, "grad_norm": 0.30812445282936096, "learning_rate": 2.957661399290995e-05, "loss": 0.5776512622833252, "step": 7560 }, { "epoch": 9.277300613496932, "grad_norm": 0.3822544813156128, "learning_rate": 2.9571679559985454e-05, "loss": 0.5507227778434753, "step": 7561 }, { "epoch": 9.278527607361964, "grad_norm": 0.3110431134700775, "learning_rate": 2.956674494278959e-05, "loss": 0.6118083000183105, "step": 7562 }, { "epoch": 9.279754601226994, "grad_norm": 0.2243228405714035, "learning_rate": 2.9561810141521268e-05, "loss": 0.7154310941696167, "step": 7563 }, { "epoch": 9.280981595092024, "grad_norm": 0.29243573546409607, "learning_rate": 2.955687515637939e-05, "loss": 0.5450518727302551, "step": 7564 }, { "epoch": 9.282208588957054, "grad_norm": 0.2916452884674072, "learning_rate": 2.9551939987562866e-05, "loss": 0.6195123195648193, "step": 7565 }, { "epoch": 9.283435582822086, "grad_norm": 0.23016665875911713, "learning_rate": 2.9547004635270625e-05, "loss": 0.7287464141845703, "step": 7566 }, { "epoch": 9.284662576687117, "grad_norm": 0.30683693289756775, "learning_rate": 2.9542069099701597e-05, "loss": 0.874770998954773, "step": 7567 }, { "epoch": 9.285889570552147, "grad_norm": 0.2471158355474472, "learning_rate": 2.953713338105472e-05, "loss": 0.7624001502990723, "step": 7568 }, { "epoch": 9.287116564417177, "grad_norm": 0.2632812559604645, "learning_rate": 2.9532197479528933e-05, "loss": 0.7892385125160217, "step": 7569 }, { "epoch": 9.28834355828221, "grad_norm": 0.2623588442802429, "learning_rate": 2.952726139532319e-05, "loss": 0.7142450213432312, "step": 7570 }, { "epoch": 9.28957055214724, "grad_norm": 0.23675383627414703, "learning_rate": 2.9522325128636463e-05, "loss": 0.9039809703826904, "step": 7571 }, { "epoch": 9.29079754601227, "grad_norm": 0.3168967664241791, "learning_rate": 2.951738867966769e-05, "loss": 0.5702145099639893, "step": 7572 }, { "epoch": 9.2920245398773, "grad_norm": 0.29491400718688965, "learning_rate": 2.951245204861588e-05, "loss": 0.5190699100494385, "step": 7573 }, { "epoch": 9.293251533742332, "grad_norm": 0.31679442524909973, "learning_rate": 2.9507515235679988e-05, "loss": 0.5779945850372314, "step": 7574 }, { "epoch": 9.294478527607362, "grad_norm": 0.31286925077438354, "learning_rate": 2.950257824105902e-05, "loss": 0.7780004739761353, "step": 7575 }, { "epoch": 9.295705521472392, "grad_norm": 0.24158817529678345, "learning_rate": 2.949764106495196e-05, "loss": 0.7699475288391113, "step": 7576 }, { "epoch": 9.296932515337422, "grad_norm": 0.26525795459747314, "learning_rate": 2.9492703707557817e-05, "loss": 0.7786884307861328, "step": 7577 }, { "epoch": 9.298159509202454, "grad_norm": 0.1943999081850052, "learning_rate": 2.9487766169075602e-05, "loss": 0.7812903523445129, "step": 7578 }, { "epoch": 9.299386503067485, "grad_norm": 0.25515589118003845, "learning_rate": 2.9482828449704332e-05, "loss": 0.6446987390518188, "step": 7579 }, { "epoch": 9.300613496932515, "grad_norm": 0.26208505034446716, "learning_rate": 2.9477890549643034e-05, "loss": 0.46512675285339355, "step": 7580 }, { "epoch": 9.301840490797545, "grad_norm": 0.3021966218948364, "learning_rate": 2.9472952469090742e-05, "loss": 0.497292160987854, "step": 7581 }, { "epoch": 9.303067484662577, "grad_norm": 0.2782938480377197, "learning_rate": 2.9468014208246487e-05, "loss": 0.7815859317779541, "step": 7582 }, { "epoch": 9.304294478527607, "grad_norm": 0.3623521029949188, "learning_rate": 2.9463075767309323e-05, "loss": 0.5451300144195557, "step": 7583 }, { "epoch": 9.305521472392638, "grad_norm": 0.30002954602241516, "learning_rate": 2.9458137146478305e-05, "loss": 0.5634669065475464, "step": 7584 }, { "epoch": 9.30674846625767, "grad_norm": 0.2819978892803192, "learning_rate": 2.9453198345952493e-05, "loss": 0.7734764218330383, "step": 7585 }, { "epoch": 9.3079754601227, "grad_norm": 0.30057621002197266, "learning_rate": 2.9448259365930957e-05, "loss": 0.6013099551200867, "step": 7586 }, { "epoch": 9.30920245398773, "grad_norm": 0.27270036935806274, "learning_rate": 2.9443320206612774e-05, "loss": 0.7158621549606323, "step": 7587 }, { "epoch": 9.31042944785276, "grad_norm": 0.3013925552368164, "learning_rate": 2.9438380868197025e-05, "loss": 0.5219025611877441, "step": 7588 }, { "epoch": 9.31165644171779, "grad_norm": 0.2129029631614685, "learning_rate": 2.9433441350882803e-05, "loss": 0.6910787224769592, "step": 7589 }, { "epoch": 9.312883435582823, "grad_norm": 0.2538963854312897, "learning_rate": 2.9428501654869206e-05, "loss": 0.7439724206924438, "step": 7590 }, { "epoch": 9.314110429447853, "grad_norm": 0.33819201588630676, "learning_rate": 2.9423561780355334e-05, "loss": 0.5373849272727966, "step": 7591 }, { "epoch": 9.315337423312883, "grad_norm": 0.3355047106742859, "learning_rate": 2.941862172754031e-05, "loss": 0.6407870650291443, "step": 7592 }, { "epoch": 9.316564417177915, "grad_norm": 0.278114914894104, "learning_rate": 2.941368149662324e-05, "loss": 0.7117588520050049, "step": 7593 }, { "epoch": 9.317791411042945, "grad_norm": 0.30452343821525574, "learning_rate": 2.940874108780325e-05, "loss": 0.5558203458786011, "step": 7594 }, { "epoch": 9.319018404907975, "grad_norm": 0.25958284735679626, "learning_rate": 2.9403800501279493e-05, "loss": 0.5293230414390564, "step": 7595 }, { "epoch": 9.320245398773006, "grad_norm": 0.2389700561761856, "learning_rate": 2.9398859737251096e-05, "loss": 0.7501893043518066, "step": 7596 }, { "epoch": 9.321472392638038, "grad_norm": 0.27047091722488403, "learning_rate": 2.939391879591721e-05, "loss": 0.7182233333587646, "step": 7597 }, { "epoch": 9.322699386503068, "grad_norm": 0.3367140591144562, "learning_rate": 2.938897767747698e-05, "loss": 0.5678456425666809, "step": 7598 }, { "epoch": 9.323926380368098, "grad_norm": 0.2627975642681122, "learning_rate": 2.9384036382129587e-05, "loss": 0.7588760852813721, "step": 7599 }, { "epoch": 9.325153374233128, "grad_norm": 0.2595604658126831, "learning_rate": 2.937909491007419e-05, "loss": 0.7375131845474243, "step": 7600 }, { "epoch": 9.32638036809816, "grad_norm": 0.3039802014827728, "learning_rate": 2.9374153261509963e-05, "loss": 0.7787014245986938, "step": 7601 }, { "epoch": 9.32760736196319, "grad_norm": 0.2780891954898834, "learning_rate": 2.9369211436636103e-05, "loss": 0.6783145070075989, "step": 7602 }, { "epoch": 9.32883435582822, "grad_norm": 0.3455308675765991, "learning_rate": 2.936426943565178e-05, "loss": 0.7685055732727051, "step": 7603 }, { "epoch": 9.330061349693251, "grad_norm": 0.2261156141757965, "learning_rate": 2.9359327258756205e-05, "loss": 0.703555703163147, "step": 7604 }, { "epoch": 9.331288343558283, "grad_norm": 0.24714402854442596, "learning_rate": 2.935438490614858e-05, "loss": 0.709152340888977, "step": 7605 }, { "epoch": 9.332515337423313, "grad_norm": 0.26559004187583923, "learning_rate": 2.934944237802812e-05, "loss": 0.6193615198135376, "step": 7606 }, { "epoch": 9.333742331288343, "grad_norm": 0.31316497921943665, "learning_rate": 2.934449967459405e-05, "loss": 0.7538319826126099, "step": 7607 }, { "epoch": 9.334969325153374, "grad_norm": 0.31836971640586853, "learning_rate": 2.933955679604558e-05, "loss": 0.6356053352355957, "step": 7608 }, { "epoch": 9.336196319018406, "grad_norm": 0.2822442948818207, "learning_rate": 2.933461374258196e-05, "loss": 0.7071318626403809, "step": 7609 }, { "epoch": 9.337423312883436, "grad_norm": 0.2619198262691498, "learning_rate": 2.9329670514402417e-05, "loss": 0.6290794014930725, "step": 7610 }, { "epoch": 9.338650306748466, "grad_norm": 0.2682925760746002, "learning_rate": 2.9324727111706208e-05, "loss": 0.7245240211486816, "step": 7611 }, { "epoch": 9.339877300613496, "grad_norm": 0.33462420105934143, "learning_rate": 2.931978353469258e-05, "loss": 0.5783142447471619, "step": 7612 }, { "epoch": 9.341104294478528, "grad_norm": 0.2655140161514282, "learning_rate": 2.93148397835608e-05, "loss": 0.6210974454879761, "step": 7613 }, { "epoch": 9.342331288343559, "grad_norm": 0.19763049483299255, "learning_rate": 2.930989585851014e-05, "loss": 0.6371555328369141, "step": 7614 }, { "epoch": 9.343558282208589, "grad_norm": 0.2959577143192291, "learning_rate": 2.9304951759739864e-05, "loss": 0.8194765448570251, "step": 7615 }, { "epoch": 9.344785276073619, "grad_norm": 0.3688822388648987, "learning_rate": 2.9300007487449253e-05, "loss": 0.5400056838989258, "step": 7616 }, { "epoch": 9.346012269938651, "grad_norm": 0.28485018014907837, "learning_rate": 2.9295063041837612e-05, "loss": 0.6375390291213989, "step": 7617 }, { "epoch": 9.347239263803681, "grad_norm": 0.2650867998600006, "learning_rate": 2.9290118423104228e-05, "loss": 0.5879194736480713, "step": 7618 }, { "epoch": 9.348466257668711, "grad_norm": 0.26513323187828064, "learning_rate": 2.9285173631448413e-05, "loss": 0.6801415681838989, "step": 7619 }, { "epoch": 9.349693251533742, "grad_norm": 0.27455487847328186, "learning_rate": 2.9280228667069466e-05, "loss": 0.7213339805603027, "step": 7620 }, { "epoch": 9.350920245398774, "grad_norm": 0.3093244135379791, "learning_rate": 2.9275283530166715e-05, "loss": 0.7093430757522583, "step": 7621 }, { "epoch": 9.352147239263804, "grad_norm": 0.2926122546195984, "learning_rate": 2.9270338220939468e-05, "loss": 0.6762562990188599, "step": 7622 }, { "epoch": 9.353374233128834, "grad_norm": 0.24337159097194672, "learning_rate": 2.9265392739587073e-05, "loss": 0.6908642649650574, "step": 7623 }, { "epoch": 9.354601226993864, "grad_norm": 0.304281622171402, "learning_rate": 2.9260447086308863e-05, "loss": 0.5641220211982727, "step": 7624 }, { "epoch": 9.355828220858896, "grad_norm": 0.28985288739204407, "learning_rate": 2.9255501261304176e-05, "loss": 0.7483515739440918, "step": 7625 }, { "epoch": 9.357055214723927, "grad_norm": 0.2988711893558502, "learning_rate": 2.9250555264772373e-05, "loss": 0.7125307321548462, "step": 7626 }, { "epoch": 9.358282208588957, "grad_norm": 0.2766329050064087, "learning_rate": 2.924560909691281e-05, "loss": 0.581213116645813, "step": 7627 }, { "epoch": 9.359509202453987, "grad_norm": 0.23094071447849274, "learning_rate": 2.9240662757924853e-05, "loss": 0.7484951019287109, "step": 7628 }, { "epoch": 9.360736196319019, "grad_norm": 0.2679159641265869, "learning_rate": 2.923571624800787e-05, "loss": 0.6263449192047119, "step": 7629 }, { "epoch": 9.36196319018405, "grad_norm": 0.31084123253822327, "learning_rate": 2.9230769567361255e-05, "loss": 0.550146222114563, "step": 7630 }, { "epoch": 9.36319018404908, "grad_norm": 0.3046770691871643, "learning_rate": 2.9225822716184383e-05, "loss": 0.587173342704773, "step": 7631 }, { "epoch": 9.36441717791411, "grad_norm": 0.3075810372829437, "learning_rate": 2.9220875694676648e-05, "loss": 0.638835072517395, "step": 7632 }, { "epoch": 9.365644171779142, "grad_norm": 0.26023805141448975, "learning_rate": 2.9215928503037447e-05, "loss": 0.6009775996208191, "step": 7633 }, { "epoch": 9.366871165644172, "grad_norm": 0.2556111216545105, "learning_rate": 2.9210981141466205e-05, "loss": 0.7369320392608643, "step": 7634 }, { "epoch": 9.368098159509202, "grad_norm": 0.3022545576095581, "learning_rate": 2.9206033610162313e-05, "loss": 0.5404362678527832, "step": 7635 }, { "epoch": 9.369325153374232, "grad_norm": 0.3063356280326843, "learning_rate": 2.920108590932521e-05, "loss": 0.5480006337165833, "step": 7636 }, { "epoch": 9.370552147239264, "grad_norm": 0.273703932762146, "learning_rate": 2.919613803915431e-05, "loss": 0.5901440382003784, "step": 7637 }, { "epoch": 9.371779141104295, "grad_norm": 0.26770690083503723, "learning_rate": 2.919118999984905e-05, "loss": 0.6754419207572937, "step": 7638 }, { "epoch": 9.373006134969325, "grad_norm": 0.3095749318599701, "learning_rate": 2.9186241791608875e-05, "loss": 0.7463579177856445, "step": 7639 }, { "epoch": 9.374233128834355, "grad_norm": 0.3055793344974518, "learning_rate": 2.9181293414633238e-05, "loss": 0.5323387384414673, "step": 7640 }, { "epoch": 9.375460122699387, "grad_norm": 0.2999752163887024, "learning_rate": 2.9176344869121592e-05, "loss": 0.7122260332107544, "step": 7641 }, { "epoch": 9.376687116564417, "grad_norm": 0.24306342005729675, "learning_rate": 2.917139615527339e-05, "loss": 0.8225845098495483, "step": 7642 }, { "epoch": 9.377914110429447, "grad_norm": 0.1883007287979126, "learning_rate": 2.916644727328811e-05, "loss": 0.7461932897567749, "step": 7643 }, { "epoch": 9.379141104294478, "grad_norm": 0.3219206631183624, "learning_rate": 2.916149822336522e-05, "loss": 0.7046467065811157, "step": 7644 }, { "epoch": 9.38036809815951, "grad_norm": 0.25098755955696106, "learning_rate": 2.9156549005704203e-05, "loss": 0.7297254800796509, "step": 7645 }, { "epoch": 9.38159509202454, "grad_norm": 0.28049036860466003, "learning_rate": 2.915159962050456e-05, "loss": 0.632888674736023, "step": 7646 }, { "epoch": 9.38282208588957, "grad_norm": 0.32362422347068787, "learning_rate": 2.914665006796577e-05, "loss": 0.6583957672119141, "step": 7647 }, { "epoch": 9.3840490797546, "grad_norm": 0.24716328084468842, "learning_rate": 2.914170034828735e-05, "loss": 0.7991303205490112, "step": 7648 }, { "epoch": 9.385276073619632, "grad_norm": 0.2720334231853485, "learning_rate": 2.9136750461668788e-05, "loss": 0.7158364057540894, "step": 7649 }, { "epoch": 9.386503067484663, "grad_norm": 0.32870420813560486, "learning_rate": 2.9131800408309625e-05, "loss": 0.42654505372047424, "step": 7650 }, { "epoch": 9.387730061349693, "grad_norm": 0.23070114850997925, "learning_rate": 2.912685018840936e-05, "loss": 0.7424616813659668, "step": 7651 }, { "epoch": 9.388957055214725, "grad_norm": 0.2688361406326294, "learning_rate": 2.912189980216754e-05, "loss": 0.7224241495132446, "step": 7652 }, { "epoch": 9.390184049079755, "grad_norm": 0.26839974522590637, "learning_rate": 2.9116949249783697e-05, "loss": 0.5487262010574341, "step": 7653 }, { "epoch": 9.391411042944785, "grad_norm": 0.2615005671977997, "learning_rate": 2.9111998531457375e-05, "loss": 0.6823135614395142, "step": 7654 }, { "epoch": 9.392638036809815, "grad_norm": 0.24274374544620514, "learning_rate": 2.9107047647388118e-05, "loss": 0.6931561827659607, "step": 7655 }, { "epoch": 9.393865030674847, "grad_norm": 0.3377716839313507, "learning_rate": 2.910209659777548e-05, "loss": 0.5086082816123962, "step": 7656 }, { "epoch": 9.395092024539878, "grad_norm": 0.2566188871860504, "learning_rate": 2.9097145382819023e-05, "loss": 0.833810567855835, "step": 7657 }, { "epoch": 9.396319018404908, "grad_norm": 0.30575892329216003, "learning_rate": 2.9092194002718338e-05, "loss": 0.568500280380249, "step": 7658 }, { "epoch": 9.397546012269938, "grad_norm": 0.281525582075119, "learning_rate": 2.908724245767297e-05, "loss": 0.6805771589279175, "step": 7659 }, { "epoch": 9.39877300613497, "grad_norm": 0.24035851657390594, "learning_rate": 2.9082290747882523e-05, "loss": 0.7017383575439453, "step": 7660 }, { "epoch": 9.4, "grad_norm": 0.3229360580444336, "learning_rate": 2.9077338873546567e-05, "loss": 0.5418328642845154, "step": 7661 }, { "epoch": 9.40122699386503, "grad_norm": 0.3613187074661255, "learning_rate": 2.9072386834864724e-05, "loss": 0.5753331184387207, "step": 7662 }, { "epoch": 9.40245398773006, "grad_norm": 0.22513870894908905, "learning_rate": 2.9067434632036573e-05, "loss": 0.7921907901763916, "step": 7663 }, { "epoch": 9.403680981595093, "grad_norm": 0.208978533744812, "learning_rate": 2.9062482265261736e-05, "loss": 0.7978571653366089, "step": 7664 }, { "epoch": 9.404907975460123, "grad_norm": 0.2901153862476349, "learning_rate": 2.905752973473983e-05, "loss": 0.6175944209098816, "step": 7665 }, { "epoch": 9.406134969325153, "grad_norm": 0.2950546145439148, "learning_rate": 2.9052577040670464e-05, "loss": 0.7160065770149231, "step": 7666 }, { "epoch": 9.407361963190183, "grad_norm": 0.241423562169075, "learning_rate": 2.9047624183253286e-05, "loss": 0.6565537452697754, "step": 7667 }, { "epoch": 9.408588957055215, "grad_norm": 0.3059052526950836, "learning_rate": 2.9042671162687914e-05, "loss": 0.4605330228805542, "step": 7668 }, { "epoch": 9.409815950920246, "grad_norm": 0.37496164441108704, "learning_rate": 2.9037717979173995e-05, "loss": 0.42459601163864136, "step": 7669 }, { "epoch": 9.411042944785276, "grad_norm": 0.335631400346756, "learning_rate": 2.9032764632911193e-05, "loss": 0.5862228870391846, "step": 7670 }, { "epoch": 9.412269938650306, "grad_norm": 0.27733880281448364, "learning_rate": 2.9027811124099136e-05, "loss": 0.5047770142555237, "step": 7671 }, { "epoch": 9.413496932515338, "grad_norm": 0.32914718985557556, "learning_rate": 2.9022857452937508e-05, "loss": 0.5363835692405701, "step": 7672 }, { "epoch": 9.414723926380368, "grad_norm": 0.27652594447135925, "learning_rate": 2.9017903619625963e-05, "loss": 0.7212443947792053, "step": 7673 }, { "epoch": 9.415950920245399, "grad_norm": 0.2238384485244751, "learning_rate": 2.9012949624364193e-05, "loss": 0.8623676300048828, "step": 7674 }, { "epoch": 9.417177914110429, "grad_norm": 0.22755368053913116, "learning_rate": 2.9007995467351862e-05, "loss": 0.742605447769165, "step": 7675 }, { "epoch": 9.41840490797546, "grad_norm": 0.26856526732444763, "learning_rate": 2.900304114878866e-05, "loss": 0.5863962769508362, "step": 7676 }, { "epoch": 9.419631901840491, "grad_norm": 0.24078063666820526, "learning_rate": 2.8998086668874303e-05, "loss": 0.5735254287719727, "step": 7677 }, { "epoch": 9.420858895705521, "grad_norm": 0.2694458067417145, "learning_rate": 2.899313202780846e-05, "loss": 0.5539449453353882, "step": 7678 }, { "epoch": 9.422085889570551, "grad_norm": 0.22898229956626892, "learning_rate": 2.8988177225790857e-05, "loss": 0.9313079118728638, "step": 7679 }, { "epoch": 9.423312883435583, "grad_norm": 0.3379346430301666, "learning_rate": 2.898322226302121e-05, "loss": 0.7933882474899292, "step": 7680 }, { "epoch": 9.424539877300614, "grad_norm": 0.28693684935569763, "learning_rate": 2.897826713969923e-05, "loss": 0.6129443049430847, "step": 7681 }, { "epoch": 9.425766871165644, "grad_norm": 0.26995259523391724, "learning_rate": 2.897331185602465e-05, "loss": 0.7066574692726135, "step": 7682 }, { "epoch": 9.426993865030674, "grad_norm": 0.31421273946762085, "learning_rate": 2.8968356412197195e-05, "loss": 0.691098153591156, "step": 7683 }, { "epoch": 9.428220858895706, "grad_norm": 0.2595633566379547, "learning_rate": 2.8963400808416618e-05, "loss": 0.6915587186813354, "step": 7684 }, { "epoch": 9.429447852760736, "grad_norm": 0.22118651866912842, "learning_rate": 2.8958445044882653e-05, "loss": 0.7110843062400818, "step": 7685 }, { "epoch": 9.430674846625767, "grad_norm": 0.2416864037513733, "learning_rate": 2.8953489121795057e-05, "loss": 0.7677040100097656, "step": 7686 }, { "epoch": 9.431901840490797, "grad_norm": 0.3288348615169525, "learning_rate": 2.8948533039353597e-05, "loss": 0.6083076000213623, "step": 7687 }, { "epoch": 9.433128834355829, "grad_norm": 0.3199387192726135, "learning_rate": 2.894357679775803e-05, "loss": 0.649189829826355, "step": 7688 }, { "epoch": 9.434355828220859, "grad_norm": 0.3174377381801605, "learning_rate": 2.8938620397208134e-05, "loss": 0.6359872221946716, "step": 7689 }, { "epoch": 9.43558282208589, "grad_norm": 0.243637815117836, "learning_rate": 2.8933663837903673e-05, "loss": 0.8739901781082153, "step": 7690 }, { "epoch": 9.43680981595092, "grad_norm": 0.2741929590702057, "learning_rate": 2.8928707120044446e-05, "loss": 0.569930911064148, "step": 7691 }, { "epoch": 9.438036809815952, "grad_norm": 0.3002377152442932, "learning_rate": 2.8923750243830244e-05, "loss": 0.5389164090156555, "step": 7692 }, { "epoch": 9.439263803680982, "grad_norm": 0.29285165667533875, "learning_rate": 2.891879320946086e-05, "loss": 0.7275223135948181, "step": 7693 }, { "epoch": 9.440490797546012, "grad_norm": 0.315064400434494, "learning_rate": 2.8913836017136093e-05, "loss": 0.730258584022522, "step": 7694 }, { "epoch": 9.441717791411042, "grad_norm": 0.2785780727863312, "learning_rate": 2.890887866705576e-05, "loss": 0.836134672164917, "step": 7695 }, { "epoch": 9.442944785276074, "grad_norm": 0.34989944100379944, "learning_rate": 2.8903921159419678e-05, "loss": 0.5208485126495361, "step": 7696 }, { "epoch": 9.444171779141104, "grad_norm": 0.26490435004234314, "learning_rate": 2.8898963494427667e-05, "loss": 0.5792471170425415, "step": 7697 }, { "epoch": 9.445398773006135, "grad_norm": 0.2817423641681671, "learning_rate": 2.8894005672279562e-05, "loss": 0.729628324508667, "step": 7698 }, { "epoch": 9.446625766871165, "grad_norm": 0.2549343407154083, "learning_rate": 2.8889047693175192e-05, "loss": 0.564329206943512, "step": 7699 }, { "epoch": 9.447852760736197, "grad_norm": 0.24066703021526337, "learning_rate": 2.8884089557314398e-05, "loss": 0.5512387156486511, "step": 7700 }, { "epoch": 9.449079754601227, "grad_norm": 0.22155208885669708, "learning_rate": 2.8879131264897043e-05, "loss": 0.7846909761428833, "step": 7701 }, { "epoch": 9.450306748466257, "grad_norm": 0.29719316959381104, "learning_rate": 2.887417281612296e-05, "loss": 0.773771345615387, "step": 7702 }, { "epoch": 9.451533742331288, "grad_norm": 0.31014177203178406, "learning_rate": 2.886921421119202e-05, "loss": 0.6484475135803223, "step": 7703 }, { "epoch": 9.45276073619632, "grad_norm": 0.3002045154571533, "learning_rate": 2.8864255450304095e-05, "loss": 0.787044882774353, "step": 7704 }, { "epoch": 9.45398773006135, "grad_norm": 0.325637549161911, "learning_rate": 2.885929653365904e-05, "loss": 0.5271260738372803, "step": 7705 }, { "epoch": 9.45521472392638, "grad_norm": 0.27626723051071167, "learning_rate": 2.885433746145677e-05, "loss": 0.760021984577179, "step": 7706 }, { "epoch": 9.45644171779141, "grad_norm": 0.28333818912506104, "learning_rate": 2.884937823389714e-05, "loss": 0.62485671043396, "step": 7707 }, { "epoch": 9.457668711656442, "grad_norm": 0.3367787301540375, "learning_rate": 2.8844418851180055e-05, "loss": 0.5482277870178223, "step": 7708 }, { "epoch": 9.458895705521472, "grad_norm": 0.22952724993228912, "learning_rate": 2.883945931350541e-05, "loss": 0.667313814163208, "step": 7709 }, { "epoch": 9.460122699386503, "grad_norm": 0.22778871655464172, "learning_rate": 2.8834499621073102e-05, "loss": 0.6951790452003479, "step": 7710 }, { "epoch": 9.461349693251535, "grad_norm": 0.32097434997558594, "learning_rate": 2.8829539774083057e-05, "loss": 0.6765163540840149, "step": 7711 }, { "epoch": 9.462576687116565, "grad_norm": 0.26854798197746277, "learning_rate": 2.882457977273519e-05, "loss": 0.5769863724708557, "step": 7712 }, { "epoch": 9.463803680981595, "grad_norm": 0.27192291617393494, "learning_rate": 2.8819619617229422e-05, "loss": 0.7026161551475525, "step": 7713 }, { "epoch": 9.465030674846625, "grad_norm": 0.282794326543808, "learning_rate": 2.8814659307765672e-05, "loss": 0.5997666716575623, "step": 7714 }, { "epoch": 9.466257668711656, "grad_norm": 0.2658664286136627, "learning_rate": 2.8809698844543888e-05, "loss": 0.6016083359718323, "step": 7715 }, { "epoch": 9.467484662576688, "grad_norm": 0.3687658905982971, "learning_rate": 2.8804738227764005e-05, "loss": 0.4603363871574402, "step": 7716 }, { "epoch": 9.468711656441718, "grad_norm": 0.2704998254776001, "learning_rate": 2.879977745762598e-05, "loss": 0.7485921382904053, "step": 7717 }, { "epoch": 9.469938650306748, "grad_norm": 0.280553936958313, "learning_rate": 2.8794816534329767e-05, "loss": 0.5781089663505554, "step": 7718 }, { "epoch": 9.47116564417178, "grad_norm": 0.23983073234558105, "learning_rate": 2.878985545807532e-05, "loss": 0.6577268242835999, "step": 7719 }, { "epoch": 9.47239263803681, "grad_norm": 0.3242053687572479, "learning_rate": 2.8784894229062608e-05, "loss": 0.5243592262268066, "step": 7720 }, { "epoch": 9.47361963190184, "grad_norm": 0.2949995994567871, "learning_rate": 2.87799328474916e-05, "loss": 0.7802331447601318, "step": 7721 }, { "epoch": 9.47484662576687, "grad_norm": 0.311866819858551, "learning_rate": 2.877497131356229e-05, "loss": 0.46196845173835754, "step": 7722 }, { "epoch": 9.476073619631903, "grad_norm": 0.3288939595222473, "learning_rate": 2.8770009627474647e-05, "loss": 0.513586699962616, "step": 7723 }, { "epoch": 9.477300613496933, "grad_norm": 0.2797496020793915, "learning_rate": 2.8765047789428668e-05, "loss": 0.6566105484962463, "step": 7724 }, { "epoch": 9.478527607361963, "grad_norm": 0.29507726430892944, "learning_rate": 2.8760085799624352e-05, "loss": 0.773886501789093, "step": 7725 }, { "epoch": 9.479754601226993, "grad_norm": 0.23499152064323425, "learning_rate": 2.8755123658261702e-05, "loss": 0.6959140300750732, "step": 7726 }, { "epoch": 9.480981595092025, "grad_norm": 0.25621265172958374, "learning_rate": 2.8750161365540722e-05, "loss": 0.8299967050552368, "step": 7727 }, { "epoch": 9.482208588957056, "grad_norm": 0.3802073001861572, "learning_rate": 2.874519892166143e-05, "loss": 0.741061806678772, "step": 7728 }, { "epoch": 9.483435582822086, "grad_norm": 0.26348599791526794, "learning_rate": 2.874023632682386e-05, "loss": 0.6527708768844604, "step": 7729 }, { "epoch": 9.484662576687116, "grad_norm": 0.3405363857746124, "learning_rate": 2.8735273581228034e-05, "loss": 0.4164811074733734, "step": 7730 }, { "epoch": 9.485889570552148, "grad_norm": 0.37025436758995056, "learning_rate": 2.8730310685073975e-05, "loss": 0.5182552337646484, "step": 7731 }, { "epoch": 9.487116564417178, "grad_norm": 0.2763766050338745, "learning_rate": 2.8725347638561733e-05, "loss": 0.5891899466514587, "step": 7732 }, { "epoch": 9.488343558282208, "grad_norm": 0.31193822622299194, "learning_rate": 2.8720384441891356e-05, "loss": 0.7284562587738037, "step": 7733 }, { "epoch": 9.489570552147239, "grad_norm": 0.3047805428504944, "learning_rate": 2.8715421095262896e-05, "loss": 0.5538984537124634, "step": 7734 }, { "epoch": 9.49079754601227, "grad_norm": 0.2917667329311371, "learning_rate": 2.8710457598876405e-05, "loss": 0.7079394459724426, "step": 7735 }, { "epoch": 9.4920245398773, "grad_norm": 0.25605860352516174, "learning_rate": 2.870549395293195e-05, "loss": 0.6988756656646729, "step": 7736 }, { "epoch": 9.493251533742331, "grad_norm": 0.27172207832336426, "learning_rate": 2.8700530157629595e-05, "loss": 0.5996915102005005, "step": 7737 }, { "epoch": 9.494478527607361, "grad_norm": 0.366493284702301, "learning_rate": 2.8695566213169435e-05, "loss": 0.3738868534564972, "step": 7738 }, { "epoch": 9.495705521472393, "grad_norm": 0.23458078503608704, "learning_rate": 2.869060211975153e-05, "loss": 0.738736093044281, "step": 7739 }, { "epoch": 9.496932515337424, "grad_norm": 0.22110481560230255, "learning_rate": 2.868563787757599e-05, "loss": 0.685813307762146, "step": 7740 }, { "epoch": 9.498159509202454, "grad_norm": 0.31754812598228455, "learning_rate": 2.8680673486842897e-05, "loss": 0.8077412843704224, "step": 7741 }, { "epoch": 9.499386503067484, "grad_norm": 0.302810937166214, "learning_rate": 2.867570894775235e-05, "loss": 0.6940024495124817, "step": 7742 }, { "epoch": 9.500613496932516, "grad_norm": 0.2405071258544922, "learning_rate": 2.867074426050446e-05, "loss": 0.747819185256958, "step": 7743 }, { "epoch": 9.501840490797546, "grad_norm": 0.2887146472930908, "learning_rate": 2.8665779425299337e-05, "loss": 0.7895393371582031, "step": 7744 }, { "epoch": 9.503067484662576, "grad_norm": 0.2979431450366974, "learning_rate": 2.8660814442337102e-05, "loss": 0.58123379945755, "step": 7745 }, { "epoch": 9.504294478527607, "grad_norm": 0.28635674715042114, "learning_rate": 2.8655849311817878e-05, "loss": 0.7656285166740417, "step": 7746 }, { "epoch": 9.505521472392639, "grad_norm": 0.46256422996520996, "learning_rate": 2.86508840339418e-05, "loss": 0.4245034456253052, "step": 7747 }, { "epoch": 9.506748466257669, "grad_norm": 0.24514202773571014, "learning_rate": 2.864591860890899e-05, "loss": 0.7260317802429199, "step": 7748 }, { "epoch": 9.5079754601227, "grad_norm": 0.299085408449173, "learning_rate": 2.8640953036919605e-05, "loss": 0.5546058416366577, "step": 7749 }, { "epoch": 9.50920245398773, "grad_norm": 0.2714874744415283, "learning_rate": 2.863598731817378e-05, "loss": 0.7070049047470093, "step": 7750 }, { "epoch": 9.510429447852761, "grad_norm": 0.2834467589855194, "learning_rate": 2.8631021452871683e-05, "loss": 0.4507148861885071, "step": 7751 }, { "epoch": 9.511656441717792, "grad_norm": 0.269786536693573, "learning_rate": 2.8626055441213466e-05, "loss": 0.5782781839370728, "step": 7752 }, { "epoch": 9.512883435582822, "grad_norm": 0.26239433884620667, "learning_rate": 2.8621089283399294e-05, "loss": 0.7390488982200623, "step": 7753 }, { "epoch": 9.514110429447852, "grad_norm": 0.249314546585083, "learning_rate": 2.861612297962935e-05, "loss": 0.7523931264877319, "step": 7754 }, { "epoch": 9.515337423312884, "grad_norm": 0.2657535970211029, "learning_rate": 2.8611156530103787e-05, "loss": 0.5909262895584106, "step": 7755 }, { "epoch": 9.516564417177914, "grad_norm": 0.2879827916622162, "learning_rate": 2.8606189935022815e-05, "loss": 0.7095016241073608, "step": 7756 }, { "epoch": 9.517791411042944, "grad_norm": 0.29857516288757324, "learning_rate": 2.8601223194586612e-05, "loss": 0.5950403213500977, "step": 7757 }, { "epoch": 9.519018404907975, "grad_norm": 0.2772964835166931, "learning_rate": 2.859625630899537e-05, "loss": 0.6737619638442993, "step": 7758 }, { "epoch": 9.520245398773007, "grad_norm": 0.2783550024032593, "learning_rate": 2.8591289278449296e-05, "loss": 0.5089750289916992, "step": 7759 }, { "epoch": 9.521472392638037, "grad_norm": 0.27728620171546936, "learning_rate": 2.8586322103148587e-05, "loss": 0.6558085083961487, "step": 7760 }, { "epoch": 9.522699386503067, "grad_norm": 0.3247080445289612, "learning_rate": 2.8581354783293462e-05, "loss": 0.653876543045044, "step": 7761 }, { "epoch": 9.523926380368097, "grad_norm": 0.23749344050884247, "learning_rate": 2.8576387319084143e-05, "loss": 0.8520736694335938, "step": 7762 }, { "epoch": 9.52515337423313, "grad_norm": 0.2731243968009949, "learning_rate": 2.8571419710720847e-05, "loss": 0.6351248025894165, "step": 7763 }, { "epoch": 9.52638036809816, "grad_norm": 0.30681943893432617, "learning_rate": 2.856645195840382e-05, "loss": 0.5778928399085999, "step": 7764 }, { "epoch": 9.52760736196319, "grad_norm": 0.3184064030647278, "learning_rate": 2.8561484062333284e-05, "loss": 0.6073507070541382, "step": 7765 }, { "epoch": 9.52883435582822, "grad_norm": 0.3001883327960968, "learning_rate": 2.855651602270948e-05, "loss": 0.6008601188659668, "step": 7766 }, { "epoch": 9.530061349693252, "grad_norm": 0.25847575068473816, "learning_rate": 2.8551547839732656e-05, "loss": 0.684059202671051, "step": 7767 }, { "epoch": 9.531288343558282, "grad_norm": 0.2746416926383972, "learning_rate": 2.8546579513603067e-05, "loss": 0.6181982755661011, "step": 7768 }, { "epoch": 9.532515337423312, "grad_norm": 0.22500644624233246, "learning_rate": 2.8541611044520978e-05, "loss": 0.6916646957397461, "step": 7769 }, { "epoch": 9.533742331288344, "grad_norm": 0.27358758449554443, "learning_rate": 2.8536642432686644e-05, "loss": 0.7108349204063416, "step": 7770 }, { "epoch": 9.534969325153375, "grad_norm": 0.2762497663497925, "learning_rate": 2.8531673678300348e-05, "loss": 0.5884381532669067, "step": 7771 }, { "epoch": 9.536196319018405, "grad_norm": 0.316488116979599, "learning_rate": 2.8526704781562345e-05, "loss": 0.5190965533256531, "step": 7772 }, { "epoch": 9.537423312883435, "grad_norm": 0.26317206025123596, "learning_rate": 2.8521735742672946e-05, "loss": 0.6400934457778931, "step": 7773 }, { "epoch": 9.538650306748465, "grad_norm": 0.2680467367172241, "learning_rate": 2.8516766561832415e-05, "loss": 0.6808182001113892, "step": 7774 }, { "epoch": 9.539877300613497, "grad_norm": 0.23936372995376587, "learning_rate": 2.8511797239241056e-05, "loss": 0.7900725603103638, "step": 7775 }, { "epoch": 9.541104294478528, "grad_norm": 0.2426244169473648, "learning_rate": 2.8506827775099166e-05, "loss": 0.8219429850578308, "step": 7776 }, { "epoch": 9.542331288343558, "grad_norm": 0.2763349413871765, "learning_rate": 2.850185816960705e-05, "loss": 0.7306289672851562, "step": 7777 }, { "epoch": 9.54355828220859, "grad_norm": 0.37171393632888794, "learning_rate": 2.8496888422965023e-05, "loss": 0.6450950503349304, "step": 7778 }, { "epoch": 9.54478527607362, "grad_norm": 0.26893043518066406, "learning_rate": 2.849191853537339e-05, "loss": 0.6837836503982544, "step": 7779 }, { "epoch": 9.54601226993865, "grad_norm": 0.2942344546318054, "learning_rate": 2.8486948507032484e-05, "loss": 0.5667972564697266, "step": 7780 }, { "epoch": 9.54723926380368, "grad_norm": 0.26330000162124634, "learning_rate": 2.848197833814263e-05, "loss": 0.6130175590515137, "step": 7781 }, { "epoch": 9.548466257668712, "grad_norm": 0.27432844042778015, "learning_rate": 2.8477008028904156e-05, "loss": 0.5796259641647339, "step": 7782 }, { "epoch": 9.549693251533743, "grad_norm": 0.23389334976673126, "learning_rate": 2.8472037579517407e-05, "loss": 0.8574175238609314, "step": 7783 }, { "epoch": 9.550920245398773, "grad_norm": 0.3421989679336548, "learning_rate": 2.8467066990182722e-05, "loss": 0.5683927536010742, "step": 7784 }, { "epoch": 9.552147239263803, "grad_norm": 0.25724127888679504, "learning_rate": 2.8462096261100456e-05, "loss": 0.6762300133705139, "step": 7785 }, { "epoch": 9.553374233128835, "grad_norm": 0.253004789352417, "learning_rate": 2.8457125392470972e-05, "loss": 0.8098490238189697, "step": 7786 }, { "epoch": 9.554601226993865, "grad_norm": 0.2420530915260315, "learning_rate": 2.8452154384494617e-05, "loss": 0.5301437377929688, "step": 7787 }, { "epoch": 9.555828220858896, "grad_norm": 0.22741201519966125, "learning_rate": 2.8447183237371772e-05, "loss": 0.5846569538116455, "step": 7788 }, { "epoch": 9.557055214723926, "grad_norm": 0.2175518274307251, "learning_rate": 2.8442211951302795e-05, "loss": 0.8920102119445801, "step": 7789 }, { "epoch": 9.558282208588958, "grad_norm": 0.29322490096092224, "learning_rate": 2.843724052648807e-05, "loss": 0.6672259569168091, "step": 7790 }, { "epoch": 9.559509202453988, "grad_norm": 0.2546500563621521, "learning_rate": 2.843226896312799e-05, "loss": 0.7544439435005188, "step": 7791 }, { "epoch": 9.560736196319018, "grad_norm": 0.26088109612464905, "learning_rate": 2.842729726142293e-05, "loss": 0.8138675093650818, "step": 7792 }, { "epoch": 9.561963190184048, "grad_norm": 0.3119554817676544, "learning_rate": 2.8422325421573298e-05, "loss": 0.6427220702171326, "step": 7793 }, { "epoch": 9.56319018404908, "grad_norm": 0.32871517539024353, "learning_rate": 2.8417353443779482e-05, "loss": 0.49532851576805115, "step": 7794 }, { "epoch": 9.56441717791411, "grad_norm": 0.2898598909378052, "learning_rate": 2.84123813282419e-05, "loss": 0.738321840763092, "step": 7795 }, { "epoch": 9.565644171779141, "grad_norm": 0.25746139883995056, "learning_rate": 2.8407409075160963e-05, "loss": 0.6017850637435913, "step": 7796 }, { "epoch": 9.566871165644171, "grad_norm": 0.2820344865322113, "learning_rate": 2.8402436684737077e-05, "loss": 0.4795898199081421, "step": 7797 }, { "epoch": 9.568098159509203, "grad_norm": 0.30054154992103577, "learning_rate": 2.839746415717068e-05, "loss": 0.5882311463356018, "step": 7798 }, { "epoch": 9.569325153374233, "grad_norm": 0.3010653853416443, "learning_rate": 2.8392491492662184e-05, "loss": 0.5017696619033813, "step": 7799 }, { "epoch": 9.570552147239264, "grad_norm": 0.35248714685440063, "learning_rate": 2.838751869141204e-05, "loss": 0.6061893701553345, "step": 7800 }, { "epoch": 9.571779141104294, "grad_norm": 0.22611573338508606, "learning_rate": 2.838254575362067e-05, "loss": 0.8140952587127686, "step": 7801 }, { "epoch": 9.573006134969326, "grad_norm": 0.2978055775165558, "learning_rate": 2.8377572679488534e-05, "loss": 0.6315868496894836, "step": 7802 }, { "epoch": 9.574233128834356, "grad_norm": 0.24715794622898102, "learning_rate": 2.8372599469216077e-05, "loss": 0.6549628973007202, "step": 7803 }, { "epoch": 9.575460122699386, "grad_norm": 0.24676735699176788, "learning_rate": 2.8367626123003753e-05, "loss": 0.5868755578994751, "step": 7804 }, { "epoch": 9.576687116564417, "grad_norm": 0.29629677534103394, "learning_rate": 2.8362652641052025e-05, "loss": 0.4735543727874756, "step": 7805 }, { "epoch": 9.577914110429449, "grad_norm": 0.33516618609428406, "learning_rate": 2.8357679023561357e-05, "loss": 0.567150354385376, "step": 7806 }, { "epoch": 9.579141104294479, "grad_norm": 0.5926488041877747, "learning_rate": 2.8352705270732226e-05, "loss": 0.6177017688751221, "step": 7807 }, { "epoch": 9.580368098159509, "grad_norm": 0.27868083119392395, "learning_rate": 2.8347731382765106e-05, "loss": 0.7660199403762817, "step": 7808 }, { "epoch": 9.58159509202454, "grad_norm": 0.26855674386024475, "learning_rate": 2.8342757359860482e-05, "loss": 0.6926262974739075, "step": 7809 }, { "epoch": 9.582822085889571, "grad_norm": 0.2765035033226013, "learning_rate": 2.833778320221885e-05, "loss": 0.6404705047607422, "step": 7810 }, { "epoch": 9.584049079754601, "grad_norm": 0.3098289668560028, "learning_rate": 2.833280891004069e-05, "loss": 0.6924275159835815, "step": 7811 }, { "epoch": 9.585276073619632, "grad_norm": 0.35635480284690857, "learning_rate": 2.832783448352651e-05, "loss": 0.5055317282676697, "step": 7812 }, { "epoch": 9.586503067484662, "grad_norm": 0.27738335728645325, "learning_rate": 2.832285992287681e-05, "loss": 0.6516152620315552, "step": 7813 }, { "epoch": 9.587730061349694, "grad_norm": 0.2958345115184784, "learning_rate": 2.8317885228292108e-05, "loss": 0.6421724557876587, "step": 7814 }, { "epoch": 9.588957055214724, "grad_norm": 0.30424386262893677, "learning_rate": 2.8312910399972914e-05, "loss": 0.6349758505821228, "step": 7815 }, { "epoch": 9.590184049079754, "grad_norm": 0.2773328125476837, "learning_rate": 2.830793543811974e-05, "loss": 0.5890205502510071, "step": 7816 }, { "epoch": 9.591411042944785, "grad_norm": 0.3003411293029785, "learning_rate": 2.8302960342933144e-05, "loss": 0.7593016028404236, "step": 7817 }, { "epoch": 9.592638036809817, "grad_norm": 0.3099750280380249, "learning_rate": 2.8297985114613622e-05, "loss": 0.8883274793624878, "step": 7818 }, { "epoch": 9.593865030674847, "grad_norm": 0.2607026696205139, "learning_rate": 2.829300975336173e-05, "loss": 0.6957579851150513, "step": 7819 }, { "epoch": 9.595092024539877, "grad_norm": 0.27000540494918823, "learning_rate": 2.828803425937801e-05, "loss": 0.7284729480743408, "step": 7820 }, { "epoch": 9.596319018404907, "grad_norm": 0.2405872941017151, "learning_rate": 2.8283058632863003e-05, "loss": 0.47185230255126953, "step": 7821 }, { "epoch": 9.59754601226994, "grad_norm": 0.3128665089607239, "learning_rate": 2.827808287401727e-05, "loss": 0.4736669659614563, "step": 7822 }, { "epoch": 9.59877300613497, "grad_norm": 0.22325479984283447, "learning_rate": 2.8273106983041358e-05, "loss": 0.6240897178649902, "step": 7823 }, { "epoch": 9.6, "grad_norm": 0.2489335685968399, "learning_rate": 2.8268130960135847e-05, "loss": 0.6266063451766968, "step": 7824 }, { "epoch": 9.60122699386503, "grad_norm": 0.30797484517097473, "learning_rate": 2.8263154805501297e-05, "loss": 0.7570163011550903, "step": 7825 }, { "epoch": 9.602453987730062, "grad_norm": 0.3425353467464447, "learning_rate": 2.8258178519338275e-05, "loss": 0.616276741027832, "step": 7826 }, { "epoch": 9.603680981595092, "grad_norm": 0.30252134799957275, "learning_rate": 2.8253202101847375e-05, "loss": 0.5446444153785706, "step": 7827 }, { "epoch": 9.604907975460122, "grad_norm": 0.27008959650993347, "learning_rate": 2.8248225553229175e-05, "loss": 0.8242246508598328, "step": 7828 }, { "epoch": 9.606134969325154, "grad_norm": 0.2572467625141144, "learning_rate": 2.8243248873684275e-05, "loss": 0.8905549049377441, "step": 7829 }, { "epoch": 9.607361963190185, "grad_norm": 0.27945831418037415, "learning_rate": 2.823827206341325e-05, "loss": 0.7537518739700317, "step": 7830 }, { "epoch": 9.608588957055215, "grad_norm": 0.2886473536491394, "learning_rate": 2.8233295122616733e-05, "loss": 0.6069766879081726, "step": 7831 }, { "epoch": 9.609815950920245, "grad_norm": 0.2678704857826233, "learning_rate": 2.8228318051495295e-05, "loss": 0.7098839282989502, "step": 7832 }, { "epoch": 9.611042944785275, "grad_norm": 0.23532909154891968, "learning_rate": 2.8223340850249562e-05, "loss": 0.8606257438659668, "step": 7833 }, { "epoch": 9.612269938650307, "grad_norm": 0.231317400932312, "learning_rate": 2.8218363519080167e-05, "loss": 0.6943895220756531, "step": 7834 }, { "epoch": 9.613496932515337, "grad_norm": 0.2345322072505951, "learning_rate": 2.8213386058187706e-05, "loss": 0.7399250268936157, "step": 7835 }, { "epoch": 9.614723926380368, "grad_norm": 0.24132105708122253, "learning_rate": 2.8208408467772818e-05, "loss": 0.6469358801841736, "step": 7836 }, { "epoch": 9.6159509202454, "grad_norm": 0.3026140034198761, "learning_rate": 2.820343074803614e-05, "loss": 0.5839694142341614, "step": 7837 }, { "epoch": 9.61717791411043, "grad_norm": 0.252550333738327, "learning_rate": 2.81984528991783e-05, "loss": 0.7600560784339905, "step": 7838 }, { "epoch": 9.61840490797546, "grad_norm": 0.2969111502170563, "learning_rate": 2.8193474921399952e-05, "loss": 0.6780409216880798, "step": 7839 }, { "epoch": 9.61963190184049, "grad_norm": 0.26558181643486023, "learning_rate": 2.8188496814901737e-05, "loss": 0.7189517021179199, "step": 7840 }, { "epoch": 9.62085889570552, "grad_norm": 0.2610943615436554, "learning_rate": 2.818351857988431e-05, "loss": 0.7207903861999512, "step": 7841 }, { "epoch": 9.622085889570553, "grad_norm": 0.23321427404880524, "learning_rate": 2.8178540216548327e-05, "loss": 0.475860595703125, "step": 7842 }, { "epoch": 9.623312883435583, "grad_norm": 0.29254814982414246, "learning_rate": 2.817356172509445e-05, "loss": 0.7436026930809021, "step": 7843 }, { "epoch": 9.624539877300613, "grad_norm": 0.2687574028968811, "learning_rate": 2.8168583105723356e-05, "loss": 0.7966195940971375, "step": 7844 }, { "epoch": 9.625766871165645, "grad_norm": 0.27607065439224243, "learning_rate": 2.816360435863571e-05, "loss": 0.9148505926132202, "step": 7845 }, { "epoch": 9.626993865030675, "grad_norm": 0.33157944679260254, "learning_rate": 2.8158625484032204e-05, "loss": 0.5775418281555176, "step": 7846 }, { "epoch": 9.628220858895705, "grad_norm": 0.2554692029953003, "learning_rate": 2.8153646482113505e-05, "loss": 0.5632274746894836, "step": 7847 }, { "epoch": 9.629447852760736, "grad_norm": 0.2238968014717102, "learning_rate": 2.814866735308031e-05, "loss": 0.8717141151428223, "step": 7848 }, { "epoch": 9.630674846625768, "grad_norm": 0.23049911856651306, "learning_rate": 2.814368809713332e-05, "loss": 0.7065593004226685, "step": 7849 }, { "epoch": 9.631901840490798, "grad_norm": 0.3246719539165497, "learning_rate": 2.8138708714473217e-05, "loss": 0.6331989765167236, "step": 7850 }, { "epoch": 9.633128834355828, "grad_norm": 0.2999447286128998, "learning_rate": 2.813372920530073e-05, "loss": 0.6876760721206665, "step": 7851 }, { "epoch": 9.634355828220858, "grad_norm": 0.30218371748924255, "learning_rate": 2.8128749569816554e-05, "loss": 0.38979336619377136, "step": 7852 }, { "epoch": 9.63558282208589, "grad_norm": 0.3090936839580536, "learning_rate": 2.8123769808221405e-05, "loss": 0.6538641452789307, "step": 7853 }, { "epoch": 9.63680981595092, "grad_norm": 0.2678055465221405, "learning_rate": 2.8118789920716004e-05, "loss": 0.6719075441360474, "step": 7854 }, { "epoch": 9.63803680981595, "grad_norm": 0.22902898490428925, "learning_rate": 2.8113809907501077e-05, "loss": 0.8560773730278015, "step": 7855 }, { "epoch": 9.639263803680981, "grad_norm": 0.2642802298069, "learning_rate": 2.8108829768777356e-05, "loss": 0.7667538523674011, "step": 7856 }, { "epoch": 9.640490797546013, "grad_norm": 0.2876010239124298, "learning_rate": 2.8103849504745565e-05, "loss": 0.6235823631286621, "step": 7857 }, { "epoch": 9.641717791411043, "grad_norm": 0.28406232595443726, "learning_rate": 2.809886911560647e-05, "loss": 0.5388154983520508, "step": 7858 }, { "epoch": 9.642944785276073, "grad_norm": 0.27346181869506836, "learning_rate": 2.809388860156078e-05, "loss": 0.5060227513313293, "step": 7859 }, { "epoch": 9.644171779141104, "grad_norm": 0.24182328581809998, "learning_rate": 2.8088907962809273e-05, "loss": 0.691925585269928, "step": 7860 }, { "epoch": 9.645398773006136, "grad_norm": 0.2274448722600937, "learning_rate": 2.8083927199552696e-05, "loss": 0.7461163997650146, "step": 7861 }, { "epoch": 9.646625766871166, "grad_norm": 0.2395409345626831, "learning_rate": 2.8078946311991805e-05, "loss": 0.780351996421814, "step": 7862 }, { "epoch": 9.647852760736196, "grad_norm": 0.22947636246681213, "learning_rate": 2.8073965300327377e-05, "loss": 0.5363095998764038, "step": 7863 }, { "epoch": 9.649079754601226, "grad_norm": 0.2548469305038452, "learning_rate": 2.806898416476017e-05, "loss": 0.7016866207122803, "step": 7864 }, { "epoch": 9.650306748466258, "grad_norm": 0.29243433475494385, "learning_rate": 2.8064002905490967e-05, "loss": 0.5330862998962402, "step": 7865 }, { "epoch": 9.651533742331289, "grad_norm": 0.3116329610347748, "learning_rate": 2.805902152272054e-05, "loss": 0.6551574468612671, "step": 7866 }, { "epoch": 9.652760736196319, "grad_norm": 0.255617618560791, "learning_rate": 2.8054040016649685e-05, "loss": 0.6667790412902832, "step": 7867 }, { "epoch": 9.653987730061349, "grad_norm": 0.23242151737213135, "learning_rate": 2.8049058387479193e-05, "loss": 0.6482268571853638, "step": 7868 }, { "epoch": 9.655214723926381, "grad_norm": 0.25104430317878723, "learning_rate": 2.8044076635409845e-05, "loss": 0.8133139610290527, "step": 7869 }, { "epoch": 9.656441717791411, "grad_norm": 0.24193979799747467, "learning_rate": 2.8039094760642455e-05, "loss": 0.6110372543334961, "step": 7870 }, { "epoch": 9.657668711656441, "grad_norm": 0.29400110244750977, "learning_rate": 2.8034112763377818e-05, "loss": 0.6864852905273438, "step": 7871 }, { "epoch": 9.658895705521472, "grad_norm": 0.2974591553211212, "learning_rate": 2.8029130643816747e-05, "loss": 0.7818368673324585, "step": 7872 }, { "epoch": 9.660122699386504, "grad_norm": 0.32497671246528625, "learning_rate": 2.8024148402160062e-05, "loss": 0.4464670419692993, "step": 7873 }, { "epoch": 9.661349693251534, "grad_norm": 0.24960818886756897, "learning_rate": 2.801916603860858e-05, "loss": 0.535497784614563, "step": 7874 }, { "epoch": 9.662576687116564, "grad_norm": 0.29224202036857605, "learning_rate": 2.801418355336313e-05, "loss": 0.5751489996910095, "step": 7875 }, { "epoch": 9.663803680981594, "grad_norm": 0.2694987952709198, "learning_rate": 2.8009200946624536e-05, "loss": 0.7463476657867432, "step": 7876 }, { "epoch": 9.665030674846626, "grad_norm": 0.2878726124763489, "learning_rate": 2.8004218218593635e-05, "loss": 0.937816858291626, "step": 7877 }, { "epoch": 9.666257668711657, "grad_norm": 0.2818160951137543, "learning_rate": 2.7999235369471262e-05, "loss": 0.7705961465835571, "step": 7878 }, { "epoch": 9.667484662576687, "grad_norm": 0.24333244562149048, "learning_rate": 2.7994252399458264e-05, "loss": 0.7328936457633972, "step": 7879 }, { "epoch": 9.668711656441717, "grad_norm": 0.24470849335193634, "learning_rate": 2.79892693087555e-05, "loss": 0.9858540892601013, "step": 7880 }, { "epoch": 9.669938650306749, "grad_norm": 0.3457293212413788, "learning_rate": 2.798428609756381e-05, "loss": 0.4598901569843292, "step": 7881 }, { "epoch": 9.67116564417178, "grad_norm": 0.28193801641464233, "learning_rate": 2.7979302766084065e-05, "loss": 0.684999942779541, "step": 7882 }, { "epoch": 9.67239263803681, "grad_norm": 0.2797239124774933, "learning_rate": 2.7974319314517106e-05, "loss": 0.7967406511306763, "step": 7883 }, { "epoch": 9.67361963190184, "grad_norm": 0.27752578258514404, "learning_rate": 2.7969335743063836e-05, "loss": 0.7917127013206482, "step": 7884 }, { "epoch": 9.674846625766872, "grad_norm": 0.28346675634384155, "learning_rate": 2.7964352051925103e-05, "loss": 0.6863394379615784, "step": 7885 }, { "epoch": 9.676073619631902, "grad_norm": 0.2980201244354248, "learning_rate": 2.7959368241301792e-05, "loss": 0.6729153394699097, "step": 7886 }, { "epoch": 9.677300613496932, "grad_norm": 0.2644399106502533, "learning_rate": 2.7954384311394794e-05, "loss": 0.506024956703186, "step": 7887 }, { "epoch": 9.678527607361962, "grad_norm": 0.3385447859764099, "learning_rate": 2.7949400262404983e-05, "loss": 0.5982370376586914, "step": 7888 }, { "epoch": 9.679754601226994, "grad_norm": 0.2886083424091339, "learning_rate": 2.7944416094533265e-05, "loss": 0.7520314455032349, "step": 7889 }, { "epoch": 9.680981595092025, "grad_norm": 0.2541733980178833, "learning_rate": 2.793943180798053e-05, "loss": 0.6262608170509338, "step": 7890 }, { "epoch": 9.682208588957055, "grad_norm": 0.2979324460029602, "learning_rate": 2.7934447402947685e-05, "loss": 0.737931489944458, "step": 7891 }, { "epoch": 9.683435582822085, "grad_norm": 0.2760447859764099, "learning_rate": 2.792946287963564e-05, "loss": 0.8339124917984009, "step": 7892 }, { "epoch": 9.684662576687117, "grad_norm": 0.23512227833271027, "learning_rate": 2.792447823824529e-05, "loss": 0.7421925663948059, "step": 7893 }, { "epoch": 9.685889570552147, "grad_norm": 0.3050349950790405, "learning_rate": 2.7919493478977565e-05, "loss": 0.7144189476966858, "step": 7894 }, { "epoch": 9.687116564417177, "grad_norm": 0.2884618639945984, "learning_rate": 2.7914508602033384e-05, "loss": 0.5584830045700073, "step": 7895 }, { "epoch": 9.68834355828221, "grad_norm": 0.31655406951904297, "learning_rate": 2.7909523607613674e-05, "loss": 0.6487463712692261, "step": 7896 }, { "epoch": 9.68957055214724, "grad_norm": 0.3320387601852417, "learning_rate": 2.7904538495919374e-05, "loss": 0.516782820224762, "step": 7897 }, { "epoch": 9.69079754601227, "grad_norm": 0.31260746717453003, "learning_rate": 2.78995532671514e-05, "loss": 0.6536757946014404, "step": 7898 }, { "epoch": 9.6920245398773, "grad_norm": 0.4830842614173889, "learning_rate": 2.7894567921510717e-05, "loss": 0.5831769704818726, "step": 7899 }, { "epoch": 9.69325153374233, "grad_norm": 0.2999313175678253, "learning_rate": 2.7889582459198244e-05, "loss": 0.7625604867935181, "step": 7900 }, { "epoch": 9.694478527607362, "grad_norm": 0.2685636579990387, "learning_rate": 2.788459688041495e-05, "loss": 0.8980787992477417, "step": 7901 }, { "epoch": 9.695705521472393, "grad_norm": 0.2242560088634491, "learning_rate": 2.787961118536178e-05, "loss": 0.7027502059936523, "step": 7902 }, { "epoch": 9.696932515337423, "grad_norm": 0.21995939314365387, "learning_rate": 2.7874625374239692e-05, "loss": 0.6568849086761475, "step": 7903 }, { "epoch": 9.698159509202455, "grad_norm": 0.32596349716186523, "learning_rate": 2.786963944724966e-05, "loss": 0.6829007863998413, "step": 7904 }, { "epoch": 9.699386503067485, "grad_norm": 0.25586777925491333, "learning_rate": 2.7864653404592645e-05, "loss": 0.7329151034355164, "step": 7905 }, { "epoch": 9.700613496932515, "grad_norm": 0.3693770170211792, "learning_rate": 2.7859667246469623e-05, "loss": 0.4008105397224426, "step": 7906 }, { "epoch": 9.701840490797546, "grad_norm": 0.2555698752403259, "learning_rate": 2.7854680973081565e-05, "loss": 0.6887948513031006, "step": 7907 }, { "epoch": 9.703067484662578, "grad_norm": 0.28275465965270996, "learning_rate": 2.784969458462946e-05, "loss": 0.5437367558479309, "step": 7908 }, { "epoch": 9.704294478527608, "grad_norm": 0.24403436481952667, "learning_rate": 2.7844708081314302e-05, "loss": 0.7268730998039246, "step": 7909 }, { "epoch": 9.705521472392638, "grad_norm": 0.33090007305145264, "learning_rate": 2.7839721463337066e-05, "loss": 0.39757204055786133, "step": 7910 }, { "epoch": 9.706748466257668, "grad_norm": 0.25446975231170654, "learning_rate": 2.7834734730898766e-05, "loss": 0.6319745779037476, "step": 7911 }, { "epoch": 9.7079754601227, "grad_norm": 0.3004908561706543, "learning_rate": 2.7829747884200384e-05, "loss": 0.7462000846862793, "step": 7912 }, { "epoch": 9.70920245398773, "grad_norm": 0.21225151419639587, "learning_rate": 2.7824760923442938e-05, "loss": 0.721692681312561, "step": 7913 }, { "epoch": 9.71042944785276, "grad_norm": 0.26651132106781006, "learning_rate": 2.781977384882744e-05, "loss": 0.667744517326355, "step": 7914 }, { "epoch": 9.71165644171779, "grad_norm": 0.28750747442245483, "learning_rate": 2.78147866605549e-05, "loss": 0.5543158054351807, "step": 7915 }, { "epoch": 9.712883435582823, "grad_norm": 0.24769815802574158, "learning_rate": 2.7809799358826328e-05, "loss": 0.7491638660430908, "step": 7916 }, { "epoch": 9.714110429447853, "grad_norm": 0.2393924444913864, "learning_rate": 2.7804811943842767e-05, "loss": 0.6491119861602783, "step": 7917 }, { "epoch": 9.715337423312883, "grad_norm": 0.28523218631744385, "learning_rate": 2.7799824415805238e-05, "loss": 0.6267105937004089, "step": 7918 }, { "epoch": 9.716564417177914, "grad_norm": 0.28353744745254517, "learning_rate": 2.7794836774914762e-05, "loss": 0.6463713645935059, "step": 7919 }, { "epoch": 9.717791411042946, "grad_norm": 0.2818962037563324, "learning_rate": 2.7789849021372393e-05, "loss": 0.7006783485412598, "step": 7920 }, { "epoch": 9.719018404907976, "grad_norm": 0.26238787174224854, "learning_rate": 2.778486115537917e-05, "loss": 0.6685671210289001, "step": 7921 }, { "epoch": 9.720245398773006, "grad_norm": 0.27369219064712524, "learning_rate": 2.777987317713613e-05, "loss": 0.44159233570098877, "step": 7922 }, { "epoch": 9.721472392638036, "grad_norm": 0.2604092061519623, "learning_rate": 2.7774885086844336e-05, "loss": 0.6954475045204163, "step": 7923 }, { "epoch": 9.722699386503068, "grad_norm": 0.26983731985092163, "learning_rate": 2.7769896884704838e-05, "loss": 0.5541260242462158, "step": 7924 }, { "epoch": 9.723926380368098, "grad_norm": 0.29985037446022034, "learning_rate": 2.776490857091869e-05, "loss": 0.6535333395004272, "step": 7925 }, { "epoch": 9.725153374233129, "grad_norm": 0.22853673994541168, "learning_rate": 2.7759920145686973e-05, "loss": 0.7614259123802185, "step": 7926 }, { "epoch": 9.726380368098159, "grad_norm": 0.30268019437789917, "learning_rate": 2.7754931609210733e-05, "loss": 0.4600343704223633, "step": 7927 }, { "epoch": 9.72760736196319, "grad_norm": 0.25017285346984863, "learning_rate": 2.7749942961691073e-05, "loss": 0.9218512773513794, "step": 7928 }, { "epoch": 9.728834355828221, "grad_norm": 0.2163238376379013, "learning_rate": 2.7744954203329043e-05, "loss": 0.6387820243835449, "step": 7929 }, { "epoch": 9.730061349693251, "grad_norm": 0.28385138511657715, "learning_rate": 2.7739965334325747e-05, "loss": 0.38987672328948975, "step": 7930 }, { "epoch": 9.731288343558282, "grad_norm": 0.31162360310554504, "learning_rate": 2.7734976354882257e-05, "loss": 0.5308504104614258, "step": 7931 }, { "epoch": 9.732515337423314, "grad_norm": 0.3190193772315979, "learning_rate": 2.7729987265199668e-05, "loss": 0.8966612815856934, "step": 7932 }, { "epoch": 9.733742331288344, "grad_norm": 0.3515007793903351, "learning_rate": 2.772499806547909e-05, "loss": 0.514600932598114, "step": 7933 }, { "epoch": 9.734969325153374, "grad_norm": 0.2771768867969513, "learning_rate": 2.7720008755921602e-05, "loss": 0.7137249708175659, "step": 7934 }, { "epoch": 9.736196319018404, "grad_norm": 0.303320050239563, "learning_rate": 2.7715019336728326e-05, "loss": 0.721876859664917, "step": 7935 }, { "epoch": 9.737423312883436, "grad_norm": 0.24151888489723206, "learning_rate": 2.771002980810036e-05, "loss": 0.7674354314804077, "step": 7936 }, { "epoch": 9.738650306748466, "grad_norm": 0.3119039237499237, "learning_rate": 2.7705040170238816e-05, "loss": 0.6461061239242554, "step": 7937 }, { "epoch": 9.739877300613497, "grad_norm": 0.29155072569847107, "learning_rate": 2.770005042334482e-05, "loss": 0.6875118017196655, "step": 7938 }, { "epoch": 9.741104294478527, "grad_norm": 0.24097073078155518, "learning_rate": 2.7695060567619498e-05, "loss": 0.607750654220581, "step": 7939 }, { "epoch": 9.742331288343559, "grad_norm": 0.3014213740825653, "learning_rate": 2.769007060326397e-05, "loss": 0.502075731754303, "step": 7940 }, { "epoch": 9.743558282208589, "grad_norm": 0.22235007584095, "learning_rate": 2.7685080530479363e-05, "loss": 0.6495522260665894, "step": 7941 }, { "epoch": 9.74478527607362, "grad_norm": 0.2736169397830963, "learning_rate": 2.768009034946682e-05, "loss": 0.608445942401886, "step": 7942 }, { "epoch": 9.74601226993865, "grad_norm": 0.3990400731563568, "learning_rate": 2.7675100060427485e-05, "loss": 0.6062763929367065, "step": 7943 }, { "epoch": 9.747239263803682, "grad_norm": 0.29225194454193115, "learning_rate": 2.767010966356249e-05, "loss": 0.7494187951087952, "step": 7944 }, { "epoch": 9.748466257668712, "grad_norm": 0.2514438331127167, "learning_rate": 2.7665119159072993e-05, "loss": 0.7397971153259277, "step": 7945 }, { "epoch": 9.749693251533742, "grad_norm": 0.2977950870990753, "learning_rate": 2.7660128547160143e-05, "loss": 0.7287536859512329, "step": 7946 }, { "epoch": 9.750920245398772, "grad_norm": 0.3146217167377472, "learning_rate": 2.7655137828025098e-05, "loss": 0.7143101692199707, "step": 7947 }, { "epoch": 9.752147239263804, "grad_norm": 0.29225394129753113, "learning_rate": 2.765014700186902e-05, "loss": 0.7715603113174438, "step": 7948 }, { "epoch": 9.753374233128834, "grad_norm": 0.23429951071739197, "learning_rate": 2.7645156068893073e-05, "loss": 0.7789784073829651, "step": 7949 }, { "epoch": 9.754601226993865, "grad_norm": 0.3219245672225952, "learning_rate": 2.7640165029298427e-05, "loss": 0.5284386873245239, "step": 7950 }, { "epoch": 9.755828220858895, "grad_norm": 0.260010302066803, "learning_rate": 2.7635173883286258e-05, "loss": 0.6853065490722656, "step": 7951 }, { "epoch": 9.757055214723927, "grad_norm": 0.26131343841552734, "learning_rate": 2.7630182631057756e-05, "loss": 0.6081886291503906, "step": 7952 }, { "epoch": 9.758282208588957, "grad_norm": 0.2890598177909851, "learning_rate": 2.7625191272814082e-05, "loss": 0.5969188213348389, "step": 7953 }, { "epoch": 9.759509202453987, "grad_norm": 0.26351219415664673, "learning_rate": 2.762019980875644e-05, "loss": 0.6617691516876221, "step": 7954 }, { "epoch": 9.76073619631902, "grad_norm": 0.3282061815261841, "learning_rate": 2.761520823908602e-05, "loss": 0.7665660977363586, "step": 7955 }, { "epoch": 9.76196319018405, "grad_norm": 0.26108360290527344, "learning_rate": 2.761021656400401e-05, "loss": 0.8318825364112854, "step": 7956 }, { "epoch": 9.76319018404908, "grad_norm": 0.28547754883766174, "learning_rate": 2.760522478371162e-05, "loss": 0.6794348955154419, "step": 7957 }, { "epoch": 9.76441717791411, "grad_norm": 0.23761026561260223, "learning_rate": 2.7600232898410044e-05, "loss": 0.7624766230583191, "step": 7958 }, { "epoch": 9.76564417177914, "grad_norm": 0.27408134937286377, "learning_rate": 2.7595240908300496e-05, "loss": 0.47812944650650024, "step": 7959 }, { "epoch": 9.766871165644172, "grad_norm": 0.38276076316833496, "learning_rate": 2.7590248813584196e-05, "loss": 0.4549049735069275, "step": 7960 }, { "epoch": 9.768098159509202, "grad_norm": 0.26040056347846985, "learning_rate": 2.758525661446234e-05, "loss": 0.7290812134742737, "step": 7961 }, { "epoch": 9.769325153374233, "grad_norm": 0.2853853106498718, "learning_rate": 2.7580264311136173e-05, "loss": 0.6503535509109497, "step": 7962 }, { "epoch": 9.770552147239265, "grad_norm": 0.31743958592414856, "learning_rate": 2.757527190380691e-05, "loss": 0.5984208583831787, "step": 7963 }, { "epoch": 9.771779141104295, "grad_norm": 0.3542799949645996, "learning_rate": 2.7570279392675786e-05, "loss": 0.6948585510253906, "step": 7964 }, { "epoch": 9.773006134969325, "grad_norm": 0.2558603584766388, "learning_rate": 2.756528677794402e-05, "loss": 0.797347903251648, "step": 7965 }, { "epoch": 9.774233128834355, "grad_norm": 0.32669901847839355, "learning_rate": 2.7560294059812864e-05, "loss": 0.5832453966140747, "step": 7966 }, { "epoch": 9.775460122699386, "grad_norm": 0.2877366542816162, "learning_rate": 2.7555301238483566e-05, "loss": 0.7520029544830322, "step": 7967 }, { "epoch": 9.776687116564418, "grad_norm": 0.3173764944076538, "learning_rate": 2.7550308314157352e-05, "loss": 0.401397705078125, "step": 7968 }, { "epoch": 9.777914110429448, "grad_norm": 0.1948188990354538, "learning_rate": 2.7545315287035488e-05, "loss": 0.5598530769348145, "step": 7969 }, { "epoch": 9.779141104294478, "grad_norm": 0.29132595658302307, "learning_rate": 2.754032215731922e-05, "loss": 0.5636059045791626, "step": 7970 }, { "epoch": 9.78036809815951, "grad_norm": 0.27817773818969727, "learning_rate": 2.7535328925209813e-05, "loss": 0.7613797783851624, "step": 7971 }, { "epoch": 9.78159509202454, "grad_norm": 0.35646358132362366, "learning_rate": 2.753033559090853e-05, "loss": 0.7013722658157349, "step": 7972 }, { "epoch": 9.78282208588957, "grad_norm": 0.24285781383514404, "learning_rate": 2.7525342154616636e-05, "loss": 0.8079888820648193, "step": 7973 }, { "epoch": 9.7840490797546, "grad_norm": 0.27686429023742676, "learning_rate": 2.7520348616535404e-05, "loss": 0.5429234504699707, "step": 7974 }, { "epoch": 9.785276073619633, "grad_norm": 0.28013718128204346, "learning_rate": 2.7515354976866105e-05, "loss": 0.5253531336784363, "step": 7975 }, { "epoch": 9.786503067484663, "grad_norm": 0.2894231677055359, "learning_rate": 2.7510361235810022e-05, "loss": 0.8423405885696411, "step": 7976 }, { "epoch": 9.787730061349693, "grad_norm": 0.3147273659706116, "learning_rate": 2.7505367393568434e-05, "loss": 0.7697036266326904, "step": 7977 }, { "epoch": 9.788957055214723, "grad_norm": 0.3012937605381012, "learning_rate": 2.7500373450342632e-05, "loss": 0.7089878916740417, "step": 7978 }, { "epoch": 9.790184049079755, "grad_norm": 0.23640084266662598, "learning_rate": 2.7495379406333916e-05, "loss": 0.5551496744155884, "step": 7979 }, { "epoch": 9.791411042944786, "grad_norm": 0.25448983907699585, "learning_rate": 2.749038526174356e-05, "loss": 0.6824632883071899, "step": 7980 }, { "epoch": 9.792638036809816, "grad_norm": 0.2560564875602722, "learning_rate": 2.7485391016772887e-05, "loss": 0.5897003412246704, "step": 7981 }, { "epoch": 9.793865030674846, "grad_norm": 0.37101465463638306, "learning_rate": 2.748039667162319e-05, "loss": 0.739283561706543, "step": 7982 }, { "epoch": 9.795092024539878, "grad_norm": 0.35979175567626953, "learning_rate": 2.7475402226495773e-05, "loss": 0.484218955039978, "step": 7983 }, { "epoch": 9.796319018404908, "grad_norm": 0.26813048124313354, "learning_rate": 2.7470407681591952e-05, "loss": 0.6219862699508667, "step": 7984 }, { "epoch": 9.797546012269938, "grad_norm": 0.2564813792705536, "learning_rate": 2.7465413037113042e-05, "loss": 0.6283615827560425, "step": 7985 }, { "epoch": 9.798773006134969, "grad_norm": 0.24242755770683289, "learning_rate": 2.7460418293260372e-05, "loss": 0.7731413841247559, "step": 7986 }, { "epoch": 9.8, "grad_norm": 0.33553680777549744, "learning_rate": 2.745542345023525e-05, "loss": 0.5324397087097168, "step": 7987 }, { "epoch": 9.801226993865031, "grad_norm": 0.23983846604824066, "learning_rate": 2.7450428508239024e-05, "loss": 0.6128418445587158, "step": 7988 }, { "epoch": 9.802453987730061, "grad_norm": 0.338713675737381, "learning_rate": 2.7445433467472998e-05, "loss": 0.6053846478462219, "step": 7989 }, { "epoch": 9.803680981595091, "grad_norm": 0.3022187352180481, "learning_rate": 2.744043832813853e-05, "loss": 0.5062423944473267, "step": 7990 }, { "epoch": 9.804907975460123, "grad_norm": 0.26106956601142883, "learning_rate": 2.7435443090436958e-05, "loss": 0.8176952600479126, "step": 7991 }, { "epoch": 9.806134969325154, "grad_norm": 0.22971494495868683, "learning_rate": 2.7430447754569617e-05, "loss": 0.755622148513794, "step": 7992 }, { "epoch": 9.807361963190184, "grad_norm": 0.30434200167655945, "learning_rate": 2.7425452320737854e-05, "loss": 0.6750363111495972, "step": 7993 }, { "epoch": 9.808588957055214, "grad_norm": 0.3559776246547699, "learning_rate": 2.742045678914303e-05, "loss": 0.4196922481060028, "step": 7994 }, { "epoch": 9.809815950920246, "grad_norm": 0.26875776052474976, "learning_rate": 2.7415461159986494e-05, "loss": 0.555622398853302, "step": 7995 }, { "epoch": 9.811042944785276, "grad_norm": 0.2512287497520447, "learning_rate": 2.741046543346961e-05, "loss": 0.8696588277816772, "step": 7996 }, { "epoch": 9.812269938650306, "grad_norm": 0.3394024968147278, "learning_rate": 2.7405469609793743e-05, "loss": 0.40318572521209717, "step": 7997 }, { "epoch": 9.813496932515337, "grad_norm": 0.3160177767276764, "learning_rate": 2.7400473689160255e-05, "loss": 0.6525273323059082, "step": 7998 }, { "epoch": 9.814723926380369, "grad_norm": 0.2651534080505371, "learning_rate": 2.7395477671770513e-05, "loss": 0.6738871335983276, "step": 7999 }, { "epoch": 9.815950920245399, "grad_norm": 0.2997594177722931, "learning_rate": 2.7390481557825903e-05, "loss": 0.7280482053756714, "step": 8000 }, { "epoch": 9.81717791411043, "grad_norm": 0.2558542788028717, "learning_rate": 2.73854853475278e-05, "loss": 0.5120368003845215, "step": 8001 }, { "epoch": 9.81840490797546, "grad_norm": 0.2585538625717163, "learning_rate": 2.7380489041077584e-05, "loss": 0.7158650159835815, "step": 8002 }, { "epoch": 9.819631901840491, "grad_norm": 0.3140195608139038, "learning_rate": 2.7375492638676646e-05, "loss": 0.7210165858268738, "step": 8003 }, { "epoch": 9.820858895705522, "grad_norm": 0.26622286438941956, "learning_rate": 2.7370496140526375e-05, "loss": 0.7362502813339233, "step": 8004 }, { "epoch": 9.822085889570552, "grad_norm": 0.28574398159980774, "learning_rate": 2.7365499546828165e-05, "loss": 0.8295000791549683, "step": 8005 }, { "epoch": 9.823312883435582, "grad_norm": 0.3750555217266083, "learning_rate": 2.736050285778341e-05, "loss": 0.44060105085372925, "step": 8006 }, { "epoch": 9.824539877300614, "grad_norm": 0.29673081636428833, "learning_rate": 2.7355506073593518e-05, "loss": 0.6763076186180115, "step": 8007 }, { "epoch": 9.825766871165644, "grad_norm": 0.27314478158950806, "learning_rate": 2.7350509194459906e-05, "loss": 0.8346712589263916, "step": 8008 }, { "epoch": 9.826993865030675, "grad_norm": 0.33192527294158936, "learning_rate": 2.7345512220583958e-05, "loss": 0.5648165941238403, "step": 8009 }, { "epoch": 9.828220858895705, "grad_norm": 0.30922675132751465, "learning_rate": 2.7340515152167112e-05, "loss": 0.4897666573524475, "step": 8010 }, { "epoch": 9.829447852760737, "grad_norm": 0.2574467658996582, "learning_rate": 2.733551798941077e-05, "loss": 0.6980960369110107, "step": 8011 }, { "epoch": 9.830674846625767, "grad_norm": 0.3294823169708252, "learning_rate": 2.7330520732516356e-05, "loss": 0.40299537777900696, "step": 8012 }, { "epoch": 9.831901840490797, "grad_norm": 0.2567453682422638, "learning_rate": 2.732552338168531e-05, "loss": 0.5354708433151245, "step": 8013 }, { "epoch": 9.833128834355827, "grad_norm": 0.24456724524497986, "learning_rate": 2.732052593711904e-05, "loss": 0.5339258909225464, "step": 8014 }, { "epoch": 9.83435582822086, "grad_norm": 0.25129273533821106, "learning_rate": 2.7315528399018993e-05, "loss": 0.678227961063385, "step": 8015 }, { "epoch": 9.83558282208589, "grad_norm": 0.27429264783859253, "learning_rate": 2.731053076758659e-05, "loss": 0.7792361974716187, "step": 8016 }, { "epoch": 9.83680981595092, "grad_norm": 0.34991610050201416, "learning_rate": 2.7305533043023292e-05, "loss": 0.38486313819885254, "step": 8017 }, { "epoch": 9.83803680981595, "grad_norm": 0.2770864963531494, "learning_rate": 2.7300535225530528e-05, "loss": 0.7682287096977234, "step": 8018 }, { "epoch": 9.839263803680982, "grad_norm": 0.24017851054668427, "learning_rate": 2.7295537315309744e-05, "loss": 0.708411693572998, "step": 8019 }, { "epoch": 9.840490797546012, "grad_norm": 0.29600679874420166, "learning_rate": 2.7290539312562414e-05, "loss": 0.2003120630979538, "step": 8020 }, { "epoch": 9.841717791411043, "grad_norm": 0.31713274121284485, "learning_rate": 2.7285541217489963e-05, "loss": 0.6662341356277466, "step": 8021 }, { "epoch": 9.842944785276075, "grad_norm": 0.24569475650787354, "learning_rate": 2.7280543030293874e-05, "loss": 0.6004542112350464, "step": 8022 }, { "epoch": 9.844171779141105, "grad_norm": 0.24407720565795898, "learning_rate": 2.727554475117559e-05, "loss": 0.7231024503707886, "step": 8023 }, { "epoch": 9.845398773006135, "grad_norm": 0.38023197650909424, "learning_rate": 2.7270546380336592e-05, "loss": 0.6670159101486206, "step": 8024 }, { "epoch": 9.846625766871165, "grad_norm": 0.2921162247657776, "learning_rate": 2.726554791797835e-05, "loss": 0.6937084197998047, "step": 8025 }, { "epoch": 9.847852760736195, "grad_norm": 0.28432661294937134, "learning_rate": 2.726054936430233e-05, "loss": 0.6250059008598328, "step": 8026 }, { "epoch": 9.849079754601227, "grad_norm": 0.28152215480804443, "learning_rate": 2.7255550719510003e-05, "loss": 0.684067964553833, "step": 8027 }, { "epoch": 9.850306748466258, "grad_norm": 0.2589647173881531, "learning_rate": 2.7250551983802868e-05, "loss": 0.6165387630462646, "step": 8028 }, { "epoch": 9.851533742331288, "grad_norm": 0.31123030185699463, "learning_rate": 2.72455531573824e-05, "loss": 0.5938689708709717, "step": 8029 }, { "epoch": 9.85276073619632, "grad_norm": 0.24143104255199432, "learning_rate": 2.7240554240450094e-05, "loss": 0.7581138014793396, "step": 8030 }, { "epoch": 9.85398773006135, "grad_norm": 0.2430049628019333, "learning_rate": 2.7235555233207426e-05, "loss": 0.5327028632164001, "step": 8031 }, { "epoch": 9.85521472392638, "grad_norm": 0.23522090911865234, "learning_rate": 2.7230556135855915e-05, "loss": 0.7654615044593811, "step": 8032 }, { "epoch": 9.85644171779141, "grad_norm": 0.30355536937713623, "learning_rate": 2.722555694859704e-05, "loss": 0.7392933368682861, "step": 8033 }, { "epoch": 9.857668711656443, "grad_norm": 0.2682637870311737, "learning_rate": 2.722055767163232e-05, "loss": 0.6165003776550293, "step": 8034 }, { "epoch": 9.858895705521473, "grad_norm": 0.3080289661884308, "learning_rate": 2.7215558305163243e-05, "loss": 0.8276926279067993, "step": 8035 }, { "epoch": 9.860122699386503, "grad_norm": 0.23682186007499695, "learning_rate": 2.7210558849391333e-05, "loss": 0.6600531339645386, "step": 8036 }, { "epoch": 9.861349693251533, "grad_norm": 0.21681155264377594, "learning_rate": 2.7205559304518103e-05, "loss": 0.6741970777511597, "step": 8037 }, { "epoch": 9.862576687116565, "grad_norm": 0.2783581018447876, "learning_rate": 2.720055967074507e-05, "loss": 0.6079009175300598, "step": 8038 }, { "epoch": 9.863803680981595, "grad_norm": 0.22673070430755615, "learning_rate": 2.7195559948273743e-05, "loss": 0.6227555274963379, "step": 8039 }, { "epoch": 9.865030674846626, "grad_norm": 0.3293776512145996, "learning_rate": 2.719056013730566e-05, "loss": 0.5645435452461243, "step": 8040 }, { "epoch": 9.866257668711656, "grad_norm": 0.26012563705444336, "learning_rate": 2.7185560238042347e-05, "loss": 0.7331506013870239, "step": 8041 }, { "epoch": 9.867484662576688, "grad_norm": 0.24552816152572632, "learning_rate": 2.718056025068534e-05, "loss": 0.7031726837158203, "step": 8042 }, { "epoch": 9.868711656441718, "grad_norm": 0.2608523368835449, "learning_rate": 2.717556017543616e-05, "loss": 0.99192214012146, "step": 8043 }, { "epoch": 9.869938650306748, "grad_norm": 0.3163054883480072, "learning_rate": 2.717056001249636e-05, "loss": 0.573244571685791, "step": 8044 }, { "epoch": 9.871165644171779, "grad_norm": 0.2814811170101166, "learning_rate": 2.7165559762067476e-05, "loss": 0.6721121072769165, "step": 8045 }, { "epoch": 9.87239263803681, "grad_norm": 0.29182979464530945, "learning_rate": 2.7160559424351052e-05, "loss": 0.5977108478546143, "step": 8046 }, { "epoch": 9.87361963190184, "grad_norm": 0.31616419553756714, "learning_rate": 2.7155558999548642e-05, "loss": 0.6356081962585449, "step": 8047 }, { "epoch": 9.874846625766871, "grad_norm": 0.27869540452957153, "learning_rate": 2.7150558487861795e-05, "loss": 0.4578474164009094, "step": 8048 }, { "epoch": 9.876073619631901, "grad_norm": 0.25289982557296753, "learning_rate": 2.7145557889492075e-05, "loss": 0.5053662061691284, "step": 8049 }, { "epoch": 9.877300613496933, "grad_norm": 0.28768086433410645, "learning_rate": 2.714055720464102e-05, "loss": 0.7027246952056885, "step": 8050 }, { "epoch": 9.878527607361963, "grad_norm": 0.4163755476474762, "learning_rate": 2.7135556433510224e-05, "loss": 0.6062684059143066, "step": 8051 }, { "epoch": 9.879754601226994, "grad_norm": 0.32232749462127686, "learning_rate": 2.7130555576301232e-05, "loss": 0.5258243083953857, "step": 8052 }, { "epoch": 9.880981595092024, "grad_norm": 0.2400919497013092, "learning_rate": 2.7125554633215615e-05, "loss": 0.7871496677398682, "step": 8053 }, { "epoch": 9.882208588957056, "grad_norm": 0.2652643918991089, "learning_rate": 2.7120553604454967e-05, "loss": 0.6438117623329163, "step": 8054 }, { "epoch": 9.883435582822086, "grad_norm": 0.2515730857849121, "learning_rate": 2.7115552490220847e-05, "loss": 0.6361733675003052, "step": 8055 }, { "epoch": 9.884662576687116, "grad_norm": 0.2536270320415497, "learning_rate": 2.711055129071484e-05, "loss": 0.7643002271652222, "step": 8056 }, { "epoch": 9.885889570552147, "grad_norm": 0.24269706010818481, "learning_rate": 2.7105550006138526e-05, "loss": 0.7456027865409851, "step": 8057 }, { "epoch": 9.887116564417179, "grad_norm": 0.2775897979736328, "learning_rate": 2.7100548636693494e-05, "loss": 0.6818116307258606, "step": 8058 }, { "epoch": 9.888343558282209, "grad_norm": 0.26259592175483704, "learning_rate": 2.709554718258135e-05, "loss": 0.5290268063545227, "step": 8059 }, { "epoch": 9.889570552147239, "grad_norm": 0.30445992946624756, "learning_rate": 2.7090545644003667e-05, "loss": 0.6210326552391052, "step": 8060 }, { "epoch": 9.89079754601227, "grad_norm": 0.2570197284221649, "learning_rate": 2.7085544021162045e-05, "loss": 0.541696310043335, "step": 8061 }, { "epoch": 9.892024539877301, "grad_norm": 0.2937662899494171, "learning_rate": 2.70805423142581e-05, "loss": 0.6546508073806763, "step": 8062 }, { "epoch": 9.893251533742331, "grad_norm": 0.25029024481773376, "learning_rate": 2.707554052349343e-05, "loss": 0.7717514038085938, "step": 8063 }, { "epoch": 9.894478527607362, "grad_norm": 0.2058262974023819, "learning_rate": 2.7070538649069636e-05, "loss": 0.6467387676239014, "step": 8064 }, { "epoch": 9.895705521472392, "grad_norm": 0.23903633654117584, "learning_rate": 2.7065536691188337e-05, "loss": 0.7963968515396118, "step": 8065 }, { "epoch": 9.896932515337424, "grad_norm": 0.31209486722946167, "learning_rate": 2.7060534650051152e-05, "loss": 0.4808531403541565, "step": 8066 }, { "epoch": 9.898159509202454, "grad_norm": 0.2730177640914917, "learning_rate": 2.705553252585968e-05, "loss": 0.695218563079834, "step": 8067 }, { "epoch": 9.899386503067484, "grad_norm": 0.28873759508132935, "learning_rate": 2.7050530318815565e-05, "loss": 0.7543017864227295, "step": 8068 }, { "epoch": 9.900613496932515, "grad_norm": 0.26651352643966675, "learning_rate": 2.704552802912041e-05, "loss": 0.594813346862793, "step": 8069 }, { "epoch": 9.901840490797547, "grad_norm": 0.3095269501209259, "learning_rate": 2.7040525656975855e-05, "loss": 0.8013652563095093, "step": 8070 }, { "epoch": 9.903067484662577, "grad_norm": 0.2803416848182678, "learning_rate": 2.7035523202583547e-05, "loss": 0.6105815172195435, "step": 8071 }, { "epoch": 9.904294478527607, "grad_norm": 0.2871115505695343, "learning_rate": 2.7030520666145083e-05, "loss": 0.6519815921783447, "step": 8072 }, { "epoch": 9.905521472392637, "grad_norm": 0.25994017720222473, "learning_rate": 2.7025518047862135e-05, "loss": 0.7683377861976624, "step": 8073 }, { "epoch": 9.90674846625767, "grad_norm": 0.3156927227973938, "learning_rate": 2.7020515347936324e-05, "loss": 0.5945304036140442, "step": 8074 }, { "epoch": 9.9079754601227, "grad_norm": 0.2481977492570877, "learning_rate": 2.7015512566569312e-05, "loss": 0.6927897930145264, "step": 8075 }, { "epoch": 9.90920245398773, "grad_norm": 0.24588562548160553, "learning_rate": 2.7010509703962726e-05, "loss": 0.5127456784248352, "step": 8076 }, { "epoch": 9.91042944785276, "grad_norm": 0.3299514949321747, "learning_rate": 2.7005506760318235e-05, "loss": 0.5679043531417847, "step": 8077 }, { "epoch": 9.911656441717792, "grad_norm": 0.2627362608909607, "learning_rate": 2.7000503735837486e-05, "loss": 0.826970100402832, "step": 8078 }, { "epoch": 9.912883435582822, "grad_norm": 0.233347550034523, "learning_rate": 2.699550063072213e-05, "loss": 0.6801852583885193, "step": 8079 }, { "epoch": 9.914110429447852, "grad_norm": 0.3093266487121582, "learning_rate": 2.6990497445173846e-05, "loss": 0.6206302642822266, "step": 8080 }, { "epoch": 9.915337423312884, "grad_norm": 0.2679794430732727, "learning_rate": 2.698549417939428e-05, "loss": 0.6346476078033447, "step": 8081 }, { "epoch": 9.916564417177915, "grad_norm": 0.2963297367095947, "learning_rate": 2.698049083358511e-05, "loss": 0.590569257736206, "step": 8082 }, { "epoch": 9.917791411042945, "grad_norm": 0.2740139067173004, "learning_rate": 2.6975487407948e-05, "loss": 0.8601871728897095, "step": 8083 }, { "epoch": 9.919018404907975, "grad_norm": 0.23198182880878448, "learning_rate": 2.6970483902684623e-05, "loss": 0.6188704967498779, "step": 8084 }, { "epoch": 9.920245398773005, "grad_norm": 0.2497221827507019, "learning_rate": 2.6965480317996666e-05, "loss": 0.7081469893455505, "step": 8085 }, { "epoch": 9.921472392638037, "grad_norm": 0.21048013865947723, "learning_rate": 2.69604766540858e-05, "loss": 0.6317428350448608, "step": 8086 }, { "epoch": 9.922699386503067, "grad_norm": 0.27951303124427795, "learning_rate": 2.6955472911153717e-05, "loss": 0.7626842260360718, "step": 8087 }, { "epoch": 9.923926380368098, "grad_norm": 0.3216420114040375, "learning_rate": 2.6950469089402092e-05, "loss": 0.4556618332862854, "step": 8088 }, { "epoch": 9.92515337423313, "grad_norm": 0.3398437798023224, "learning_rate": 2.694546518903262e-05, "loss": 0.6197619438171387, "step": 8089 }, { "epoch": 9.92638036809816, "grad_norm": 0.3018592894077301, "learning_rate": 2.6940461210247002e-05, "loss": 0.7909551858901978, "step": 8090 }, { "epoch": 9.92760736196319, "grad_norm": 0.26869428157806396, "learning_rate": 2.6935457153246924e-05, "loss": 0.7130799889564514, "step": 8091 }, { "epoch": 9.92883435582822, "grad_norm": 0.27087727189064026, "learning_rate": 2.6930453018234086e-05, "loss": 0.7323068380355835, "step": 8092 }, { "epoch": 9.93006134969325, "grad_norm": 0.311834454536438, "learning_rate": 2.6925448805410198e-05, "loss": 0.5815169811248779, "step": 8093 }, { "epoch": 9.931288343558283, "grad_norm": 0.34831494092941284, "learning_rate": 2.6920444514976944e-05, "loss": 0.5727505683898926, "step": 8094 }, { "epoch": 9.932515337423313, "grad_norm": 0.2944394648075104, "learning_rate": 2.6915440147136066e-05, "loss": 0.7064962387084961, "step": 8095 }, { "epoch": 9.933742331288343, "grad_norm": 0.24046720564365387, "learning_rate": 2.6910435702089254e-05, "loss": 0.7806229591369629, "step": 8096 }, { "epoch": 9.934969325153375, "grad_norm": 0.26106470823287964, "learning_rate": 2.690543118003823e-05, "loss": 0.5437203645706177, "step": 8097 }, { "epoch": 9.936196319018405, "grad_norm": 0.2624424993991852, "learning_rate": 2.6900426581184706e-05, "loss": 0.5857323408126831, "step": 8098 }, { "epoch": 9.937423312883435, "grad_norm": 0.32358747720718384, "learning_rate": 2.6895421905730407e-05, "loss": 0.714434027671814, "step": 8099 }, { "epoch": 9.938650306748466, "grad_norm": 0.2957558035850525, "learning_rate": 2.6890417153877056e-05, "loss": 0.726502001285553, "step": 8100 }, { "epoch": 9.939877300613498, "grad_norm": 0.24490660429000854, "learning_rate": 2.6885412325826386e-05, "loss": 0.6068605184555054, "step": 8101 }, { "epoch": 9.941104294478528, "grad_norm": 0.25018638372421265, "learning_rate": 2.6880407421780117e-05, "loss": 0.7087689638137817, "step": 8102 }, { "epoch": 9.942331288343558, "grad_norm": 0.27156925201416016, "learning_rate": 2.6875402441939988e-05, "loss": 0.6475590467453003, "step": 8103 }, { "epoch": 9.943558282208588, "grad_norm": 0.2620546519756317, "learning_rate": 2.687039738650774e-05, "loss": 0.7738996148109436, "step": 8104 }, { "epoch": 9.94478527607362, "grad_norm": 0.3262263238430023, "learning_rate": 2.68653922556851e-05, "loss": 0.43114081025123596, "step": 8105 }, { "epoch": 9.94601226993865, "grad_norm": 0.5067592859268188, "learning_rate": 2.6860387049673823e-05, "loss": 0.5511378049850464, "step": 8106 }, { "epoch": 9.94723926380368, "grad_norm": 0.25778448581695557, "learning_rate": 2.685538176867566e-05, "loss": 0.8218804001808167, "step": 8107 }, { "epoch": 9.948466257668711, "grad_norm": 0.2346697449684143, "learning_rate": 2.6850376412892337e-05, "loss": 0.6582692861557007, "step": 8108 }, { "epoch": 9.949693251533743, "grad_norm": 0.2629135549068451, "learning_rate": 2.6845370982525625e-05, "loss": 0.7554268836975098, "step": 8109 }, { "epoch": 9.950920245398773, "grad_norm": 0.21511605381965637, "learning_rate": 2.6840365477777264e-05, "loss": 0.746806263923645, "step": 8110 }, { "epoch": 9.952147239263804, "grad_norm": 0.30373451113700867, "learning_rate": 2.683535989884902e-05, "loss": 0.750883162021637, "step": 8111 }, { "epoch": 9.953374233128834, "grad_norm": 0.34038054943084717, "learning_rate": 2.683035424594266e-05, "loss": 0.32259172201156616, "step": 8112 }, { "epoch": 9.954601226993866, "grad_norm": 0.24143140017986298, "learning_rate": 2.682534851925994e-05, "loss": 0.632678747177124, "step": 8113 }, { "epoch": 9.955828220858896, "grad_norm": 0.2873026728630066, "learning_rate": 2.682034271900263e-05, "loss": 0.6279386878013611, "step": 8114 }, { "epoch": 9.957055214723926, "grad_norm": 0.30673131346702576, "learning_rate": 2.681533684537249e-05, "loss": 0.5110592246055603, "step": 8115 }, { "epoch": 9.958282208588956, "grad_norm": 0.26495081186294556, "learning_rate": 2.6810330898571296e-05, "loss": 0.5196846723556519, "step": 8116 }, { "epoch": 9.959509202453988, "grad_norm": 0.3212563693523407, "learning_rate": 2.680532487880083e-05, "loss": 0.6762124300003052, "step": 8117 }, { "epoch": 9.960736196319019, "grad_norm": 0.33278441429138184, "learning_rate": 2.6800318786262867e-05, "loss": 0.4983416199684143, "step": 8118 }, { "epoch": 9.961963190184049, "grad_norm": 0.2817368805408478, "learning_rate": 2.6795312621159198e-05, "loss": 0.7905446290969849, "step": 8119 }, { "epoch": 9.963190184049079, "grad_norm": 0.20376142859458923, "learning_rate": 2.679030638369159e-05, "loss": 0.7298320531845093, "step": 8120 }, { "epoch": 9.964417177914111, "grad_norm": 0.2866024374961853, "learning_rate": 2.6785300074061836e-05, "loss": 0.6438112854957581, "step": 8121 }, { "epoch": 9.965644171779141, "grad_norm": 0.28484731912612915, "learning_rate": 2.678029369247173e-05, "loss": 0.6652641296386719, "step": 8122 }, { "epoch": 9.966871165644172, "grad_norm": 0.3091638684272766, "learning_rate": 2.6775287239123058e-05, "loss": 0.6506946086883545, "step": 8123 }, { "epoch": 9.968098159509202, "grad_norm": 0.39537110924720764, "learning_rate": 2.677028071421763e-05, "loss": 0.3153056502342224, "step": 8124 }, { "epoch": 9.969325153374234, "grad_norm": 0.2261544018983841, "learning_rate": 2.6765274117957227e-05, "loss": 0.8194409608840942, "step": 8125 }, { "epoch": 9.970552147239264, "grad_norm": 0.31345316767692566, "learning_rate": 2.676026745054367e-05, "loss": 0.5966938734054565, "step": 8126 }, { "epoch": 9.971779141104294, "grad_norm": 0.21626751124858856, "learning_rate": 2.6755260712178738e-05, "loss": 0.7558445930480957, "step": 8127 }, { "epoch": 9.973006134969324, "grad_norm": 0.22448530793190002, "learning_rate": 2.675025390306426e-05, "loss": 0.709533154964447, "step": 8128 }, { "epoch": 9.974233128834356, "grad_norm": 0.27227112650871277, "learning_rate": 2.6745247023402038e-05, "loss": 0.6919693946838379, "step": 8129 }, { "epoch": 9.975460122699387, "grad_norm": 0.4143243432044983, "learning_rate": 2.6740240073393885e-05, "loss": 0.6538602113723755, "step": 8130 }, { "epoch": 9.976687116564417, "grad_norm": 0.2751287519931793, "learning_rate": 2.673523305324162e-05, "loss": 0.5896996855735779, "step": 8131 }, { "epoch": 9.977914110429447, "grad_norm": 0.2710951268672943, "learning_rate": 2.6730225963147058e-05, "loss": 0.6030243635177612, "step": 8132 }, { "epoch": 9.979141104294479, "grad_norm": 0.3015625476837158, "learning_rate": 2.672521880331203e-05, "loss": 0.6209772825241089, "step": 8133 }, { "epoch": 9.98036809815951, "grad_norm": 0.24781513214111328, "learning_rate": 2.6720211573938346e-05, "loss": 0.8579310774803162, "step": 8134 }, { "epoch": 9.98159509202454, "grad_norm": 0.3107212781906128, "learning_rate": 2.6715204275227834e-05, "loss": 0.567180871963501, "step": 8135 }, { "epoch": 9.98282208588957, "grad_norm": 0.3010776937007904, "learning_rate": 2.671019690738234e-05, "loss": 0.48135045170783997, "step": 8136 }, { "epoch": 9.984049079754602, "grad_norm": 0.30992433428764343, "learning_rate": 2.6705189470603682e-05, "loss": 0.4325256645679474, "step": 8137 }, { "epoch": 9.985276073619632, "grad_norm": 0.2700008451938629, "learning_rate": 2.67001819650937e-05, "loss": 0.5891233682632446, "step": 8138 }, { "epoch": 9.986503067484662, "grad_norm": 0.3573036193847656, "learning_rate": 2.6695174391054228e-05, "loss": 0.6869769096374512, "step": 8139 }, { "epoch": 9.987730061349692, "grad_norm": 0.28511863946914673, "learning_rate": 2.6690166748687117e-05, "loss": 0.7046219110488892, "step": 8140 }, { "epoch": 9.988957055214724, "grad_norm": 0.27288007736206055, "learning_rate": 2.66851590381942e-05, "loss": 0.8419010639190674, "step": 8141 }, { "epoch": 9.990184049079755, "grad_norm": 0.25473567843437195, "learning_rate": 2.668015125977733e-05, "loss": 0.5482197999954224, "step": 8142 }, { "epoch": 9.991411042944785, "grad_norm": 0.32456761598587036, "learning_rate": 2.667514341363836e-05, "loss": 0.5297106504440308, "step": 8143 }, { "epoch": 9.992638036809815, "grad_norm": 0.27756962180137634, "learning_rate": 2.6670135499979134e-05, "loss": 0.6414309740066528, "step": 8144 }, { "epoch": 9.993865030674847, "grad_norm": 0.22336366772651672, "learning_rate": 2.6665127519001502e-05, "loss": 0.7774495482444763, "step": 8145 }, { "epoch": 9.995092024539877, "grad_norm": 0.32304075360298157, "learning_rate": 2.666011947090734e-05, "loss": 0.6247328519821167, "step": 8146 }, { "epoch": 9.996319018404908, "grad_norm": 0.30878081917762756, "learning_rate": 2.665511135589849e-05, "loss": 0.46900880336761475, "step": 8147 }, { "epoch": 9.99754601226994, "grad_norm": 0.26870566606521606, "learning_rate": 2.6650103174176827e-05, "loss": 0.6986773014068604, "step": 8148 }, { "epoch": 9.99877300613497, "grad_norm": 0.27917176485061646, "learning_rate": 2.6645094925944204e-05, "loss": 0.5061341524124146, "step": 8149 }, { "epoch": 10.0, "grad_norm": 0.2889094650745392, "learning_rate": 2.6640086611402497e-05, "loss": 0.7988486289978027, "step": 8150 }, { "epoch": 10.00122699386503, "grad_norm": 0.32266461849212646, "learning_rate": 2.663507823075358e-05, "loss": 0.5872830152511597, "step": 8151 }, { "epoch": 10.002453987730062, "grad_norm": 0.24604710936546326, "learning_rate": 2.6630069784199318e-05, "loss": 0.7490848302841187, "step": 8152 }, { "epoch": 10.003680981595092, "grad_norm": 0.2602784037590027, "learning_rate": 2.6625061271941598e-05, "loss": 0.48097604513168335, "step": 8153 }, { "epoch": 10.004907975460123, "grad_norm": 0.2269633263349533, "learning_rate": 2.662005269418229e-05, "loss": 0.5921735763549805, "step": 8154 }, { "epoch": 10.006134969325153, "grad_norm": 0.25675952434539795, "learning_rate": 2.6615044051123282e-05, "loss": 0.601032018661499, "step": 8155 }, { "epoch": 10.007361963190185, "grad_norm": 0.26681047677993774, "learning_rate": 2.6610035342966456e-05, "loss": 0.6448390483856201, "step": 8156 }, { "epoch": 10.008588957055215, "grad_norm": 0.25963518023490906, "learning_rate": 2.660502656991369e-05, "loss": 0.7211657166481018, "step": 8157 }, { "epoch": 10.009815950920245, "grad_norm": 0.3023524582386017, "learning_rate": 2.6600017732166886e-05, "loss": 0.5741034746170044, "step": 8158 }, { "epoch": 10.011042944785276, "grad_norm": 0.2323361486196518, "learning_rate": 2.6595008829927924e-05, "loss": 0.8036826252937317, "step": 8159 }, { "epoch": 10.012269938650308, "grad_norm": 0.24532122910022736, "learning_rate": 2.6589999863398717e-05, "loss": 0.6160972118377686, "step": 8160 }, { "epoch": 10.013496932515338, "grad_norm": 0.27387118339538574, "learning_rate": 2.6584990832781137e-05, "loss": 0.6815520524978638, "step": 8161 }, { "epoch": 10.014723926380368, "grad_norm": 0.2313029170036316, "learning_rate": 2.6579981738277103e-05, "loss": 0.6015870571136475, "step": 8162 }, { "epoch": 10.015950920245398, "grad_norm": 0.28824174404144287, "learning_rate": 2.6574972580088514e-05, "loss": 0.6348482370376587, "step": 8163 }, { "epoch": 10.01717791411043, "grad_norm": 0.258834570646286, "learning_rate": 2.6569963358417265e-05, "loss": 0.5551198720932007, "step": 8164 }, { "epoch": 10.01840490797546, "grad_norm": 0.20529980957508087, "learning_rate": 2.656495407346528e-05, "loss": 0.8308024406433105, "step": 8165 }, { "epoch": 10.01963190184049, "grad_norm": 0.2650367021560669, "learning_rate": 2.6559944725434456e-05, "loss": 0.69676673412323, "step": 8166 }, { "epoch": 10.020858895705521, "grad_norm": 0.2617354989051819, "learning_rate": 2.6554935314526713e-05, "loss": 0.6410548686981201, "step": 8167 }, { "epoch": 10.022085889570553, "grad_norm": 0.26275113224983215, "learning_rate": 2.6549925840943956e-05, "loss": 0.7520206570625305, "step": 8168 }, { "epoch": 10.023312883435583, "grad_norm": 0.29487931728363037, "learning_rate": 2.6544916304888108e-05, "loss": 0.5755007863044739, "step": 8169 }, { "epoch": 10.024539877300613, "grad_norm": 0.26420915126800537, "learning_rate": 2.6539906706561098e-05, "loss": 0.750726044178009, "step": 8170 }, { "epoch": 10.025766871165644, "grad_norm": 0.2551104426383972, "learning_rate": 2.653489704616483e-05, "loss": 0.7620382308959961, "step": 8171 }, { "epoch": 10.026993865030676, "grad_norm": 0.2863752543926239, "learning_rate": 2.652988732390124e-05, "loss": 0.5167300701141357, "step": 8172 }, { "epoch": 10.028220858895706, "grad_norm": 0.28677964210510254, "learning_rate": 2.6524877539972263e-05, "loss": 0.4546663761138916, "step": 8173 }, { "epoch": 10.029447852760736, "grad_norm": 0.2784143090248108, "learning_rate": 2.6519867694579824e-05, "loss": 0.5969674587249756, "step": 8174 }, { "epoch": 10.030674846625766, "grad_norm": 0.30431172251701355, "learning_rate": 2.6514857787925845e-05, "loss": 0.6427561044692993, "step": 8175 }, { "epoch": 10.031901840490798, "grad_norm": 0.21155095100402832, "learning_rate": 2.650984782021227e-05, "loss": 0.5338947772979736, "step": 8176 }, { "epoch": 10.033128834355828, "grad_norm": 0.31026026606559753, "learning_rate": 2.650483779164104e-05, "loss": 0.5292293429374695, "step": 8177 }, { "epoch": 10.034355828220859, "grad_norm": 0.29033568501472473, "learning_rate": 2.6499827702414086e-05, "loss": 0.6154739856719971, "step": 8178 }, { "epoch": 10.035582822085889, "grad_norm": 0.2382689118385315, "learning_rate": 2.6494817552733365e-05, "loss": 0.5085824131965637, "step": 8179 }, { "epoch": 10.036809815950921, "grad_norm": 0.27574771642684937, "learning_rate": 2.6489807342800797e-05, "loss": 0.43597331643104553, "step": 8180 }, { "epoch": 10.038036809815951, "grad_norm": 0.30231818556785583, "learning_rate": 2.648479707281835e-05, "loss": 0.3379189074039459, "step": 8181 }, { "epoch": 10.039263803680981, "grad_norm": 0.32006019353866577, "learning_rate": 2.6479786742987972e-05, "loss": 0.546899139881134, "step": 8182 }, { "epoch": 10.040490797546012, "grad_norm": 0.2613171935081482, "learning_rate": 2.6474776353511603e-05, "loss": 0.6889445781707764, "step": 8183 }, { "epoch": 10.041717791411044, "grad_norm": 0.2924613356590271, "learning_rate": 2.6469765904591214e-05, "loss": 0.5851524472236633, "step": 8184 }, { "epoch": 10.042944785276074, "grad_norm": 0.2935611605644226, "learning_rate": 2.6464755396428748e-05, "loss": 0.747705340385437, "step": 8185 }, { "epoch": 10.044171779141104, "grad_norm": 0.2602192461490631, "learning_rate": 2.6459744829226174e-05, "loss": 0.5694552063941956, "step": 8186 }, { "epoch": 10.045398773006134, "grad_norm": 0.2798595428466797, "learning_rate": 2.6454734203185442e-05, "loss": 0.603718101978302, "step": 8187 }, { "epoch": 10.046625766871166, "grad_norm": 0.311987966299057, "learning_rate": 2.644972351850853e-05, "loss": 0.6350143551826477, "step": 8188 }, { "epoch": 10.047852760736196, "grad_norm": 0.2500719428062439, "learning_rate": 2.6444712775397396e-05, "loss": 0.6440575122833252, "step": 8189 }, { "epoch": 10.049079754601227, "grad_norm": 0.24390922486782074, "learning_rate": 2.643970197405401e-05, "loss": 0.7215626835823059, "step": 8190 }, { "epoch": 10.050306748466257, "grad_norm": 0.2484075427055359, "learning_rate": 2.6434691114680345e-05, "loss": 0.6830735206604004, "step": 8191 }, { "epoch": 10.051533742331289, "grad_norm": 0.2588702142238617, "learning_rate": 2.6429680197478367e-05, "loss": 0.5805211067199707, "step": 8192 }, { "epoch": 10.05276073619632, "grad_norm": 0.28157827258110046, "learning_rate": 2.642466922265006e-05, "loss": 0.650860071182251, "step": 8193 }, { "epoch": 10.05398773006135, "grad_norm": 0.22354674339294434, "learning_rate": 2.64196581903974e-05, "loss": 0.7608189582824707, "step": 8194 }, { "epoch": 10.05521472392638, "grad_norm": 0.27126577496528625, "learning_rate": 2.6414647100922368e-05, "loss": 0.563025712966919, "step": 8195 }, { "epoch": 10.056441717791412, "grad_norm": 0.2416396141052246, "learning_rate": 2.6409635954426955e-05, "loss": 0.6940189599990845, "step": 8196 }, { "epoch": 10.057668711656442, "grad_norm": 0.2473011165857315, "learning_rate": 2.6404624751113127e-05, "loss": 0.6424667239189148, "step": 8197 }, { "epoch": 10.058895705521472, "grad_norm": 0.23688068985939026, "learning_rate": 2.639961349118288e-05, "loss": 0.5071818232536316, "step": 8198 }, { "epoch": 10.060122699386502, "grad_norm": 0.22651197016239166, "learning_rate": 2.6394602174838214e-05, "loss": 0.568153977394104, "step": 8199 }, { "epoch": 10.061349693251534, "grad_norm": 0.26783695816993713, "learning_rate": 2.6389590802281105e-05, "loss": 0.5556658506393433, "step": 8200 }, { "epoch": 10.062576687116565, "grad_norm": 0.2583819031715393, "learning_rate": 2.638457937371356e-05, "loss": 0.7183079719543457, "step": 8201 }, { "epoch": 10.063803680981595, "grad_norm": 0.23410563170909882, "learning_rate": 2.6379567889337558e-05, "loss": 0.7247555255889893, "step": 8202 }, { "epoch": 10.065030674846625, "grad_norm": 0.2548772394657135, "learning_rate": 2.6374556349355117e-05, "loss": 0.6277580857276917, "step": 8203 }, { "epoch": 10.066257668711657, "grad_norm": 0.2856414020061493, "learning_rate": 2.6369544753968234e-05, "loss": 0.7274060249328613, "step": 8204 }, { "epoch": 10.067484662576687, "grad_norm": 0.24348299205303192, "learning_rate": 2.6364533103378896e-05, "loss": 0.938323974609375, "step": 8205 }, { "epoch": 10.068711656441717, "grad_norm": 0.2527957856655121, "learning_rate": 2.6359521397789127e-05, "loss": 0.7024022936820984, "step": 8206 }, { "epoch": 10.069938650306748, "grad_norm": 0.26514551043510437, "learning_rate": 2.635450963740093e-05, "loss": 0.7975714206695557, "step": 8207 }, { "epoch": 10.07116564417178, "grad_norm": 0.27980390191078186, "learning_rate": 2.6349497822416314e-05, "loss": 0.6441330909729004, "step": 8208 }, { "epoch": 10.07239263803681, "grad_norm": 0.2820267677307129, "learning_rate": 2.6344485953037285e-05, "loss": 0.6528328657150269, "step": 8209 }, { "epoch": 10.07361963190184, "grad_norm": 0.2768419086933136, "learning_rate": 2.633947402946586e-05, "loss": 0.6661956906318665, "step": 8210 }, { "epoch": 10.07484662576687, "grad_norm": 0.28398898243904114, "learning_rate": 2.6334462051904068e-05, "loss": 0.6928024291992188, "step": 8211 }, { "epoch": 10.076073619631902, "grad_norm": 0.23067161440849304, "learning_rate": 2.6329450020553907e-05, "loss": 0.7776541709899902, "step": 8212 }, { "epoch": 10.077300613496933, "grad_norm": 0.2628990113735199, "learning_rate": 2.6324437935617414e-05, "loss": 0.7593581080436707, "step": 8213 }, { "epoch": 10.078527607361963, "grad_norm": 0.30428868532180786, "learning_rate": 2.6319425797296598e-05, "loss": 0.6177962422370911, "step": 8214 }, { "epoch": 10.079754601226995, "grad_norm": 0.21940244734287262, "learning_rate": 2.631441360579349e-05, "loss": 0.638182520866394, "step": 8215 }, { "epoch": 10.080981595092025, "grad_norm": 0.2533566653728485, "learning_rate": 2.6309401361310122e-05, "loss": 0.6878241896629333, "step": 8216 }, { "epoch": 10.082208588957055, "grad_norm": 0.24091513454914093, "learning_rate": 2.6304389064048517e-05, "loss": 0.8181180953979492, "step": 8217 }, { "epoch": 10.083435582822085, "grad_norm": 0.2770196795463562, "learning_rate": 2.6299376714210716e-05, "loss": 0.6315176486968994, "step": 8218 }, { "epoch": 10.084662576687117, "grad_norm": 0.246595099568367, "learning_rate": 2.6294364311998743e-05, "loss": 0.8413705825805664, "step": 8219 }, { "epoch": 10.085889570552148, "grad_norm": 0.2858022153377533, "learning_rate": 2.6289351857614637e-05, "loss": 0.6513274312019348, "step": 8220 }, { "epoch": 10.087116564417178, "grad_norm": 0.23806728422641754, "learning_rate": 2.6284339351260435e-05, "loss": 0.6792060136795044, "step": 8221 }, { "epoch": 10.088343558282208, "grad_norm": 0.2731674313545227, "learning_rate": 2.627932679313817e-05, "loss": 0.734133243560791, "step": 8222 }, { "epoch": 10.08957055214724, "grad_norm": 0.2690226137638092, "learning_rate": 2.62743141834499e-05, "loss": 0.5331560969352722, "step": 8223 }, { "epoch": 10.09079754601227, "grad_norm": 0.38629627227783203, "learning_rate": 2.6269301522397655e-05, "loss": 0.4911906123161316, "step": 8224 }, { "epoch": 10.0920245398773, "grad_norm": 0.2048715054988861, "learning_rate": 2.626428881018349e-05, "loss": 0.739513099193573, "step": 8225 }, { "epoch": 10.09325153374233, "grad_norm": 0.2224857658147812, "learning_rate": 2.625927604700944e-05, "loss": 0.6689599752426147, "step": 8226 }, { "epoch": 10.094478527607363, "grad_norm": 0.30791762471199036, "learning_rate": 2.625426323307757e-05, "loss": 0.6605879664421082, "step": 8227 }, { "epoch": 10.095705521472393, "grad_norm": 0.31656476855278015, "learning_rate": 2.6249250368589924e-05, "loss": 0.5234866142272949, "step": 8228 }, { "epoch": 10.096932515337423, "grad_norm": 0.3007611334323883, "learning_rate": 2.6244237453748553e-05, "loss": 0.562179684638977, "step": 8229 }, { "epoch": 10.098159509202453, "grad_norm": 0.3346426784992218, "learning_rate": 2.623922448875553e-05, "loss": 0.5256490111351013, "step": 8230 }, { "epoch": 10.099386503067485, "grad_norm": 0.26379162073135376, "learning_rate": 2.6234211473812904e-05, "loss": 0.6294642686843872, "step": 8231 }, { "epoch": 10.100613496932516, "grad_norm": 0.24040915071964264, "learning_rate": 2.6229198409122728e-05, "loss": 0.7084238529205322, "step": 8232 }, { "epoch": 10.101840490797546, "grad_norm": 0.29377520084381104, "learning_rate": 2.6224185294887064e-05, "loss": 0.390656441450119, "step": 8233 }, { "epoch": 10.103067484662576, "grad_norm": 0.28585055470466614, "learning_rate": 2.621917213130799e-05, "loss": 0.4986931085586548, "step": 8234 }, { "epoch": 10.104294478527608, "grad_norm": 0.28902682662010193, "learning_rate": 2.6214158918587566e-05, "loss": 0.6360486745834351, "step": 8235 }, { "epoch": 10.105521472392638, "grad_norm": 0.2701789438724518, "learning_rate": 2.6209145656927853e-05, "loss": 0.5940130352973938, "step": 8236 }, { "epoch": 10.106748466257669, "grad_norm": 0.2691538333892822, "learning_rate": 2.6204132346530936e-05, "loss": 0.740478515625, "step": 8237 }, { "epoch": 10.107975460122699, "grad_norm": 0.2676914632320404, "learning_rate": 2.619911898759887e-05, "loss": 0.5558108687400818, "step": 8238 }, { "epoch": 10.10920245398773, "grad_norm": 0.2787107825279236, "learning_rate": 2.619410558033374e-05, "loss": 0.8057708144187927, "step": 8239 }, { "epoch": 10.110429447852761, "grad_norm": 0.22985966503620148, "learning_rate": 2.6189092124937615e-05, "loss": 0.7198972702026367, "step": 8240 }, { "epoch": 10.111656441717791, "grad_norm": 0.2534818947315216, "learning_rate": 2.618407862161258e-05, "loss": 0.6989873647689819, "step": 8241 }, { "epoch": 10.112883435582821, "grad_norm": 0.28821244835853577, "learning_rate": 2.617906507056072e-05, "loss": 0.6623212099075317, "step": 8242 }, { "epoch": 10.114110429447853, "grad_norm": 0.23749154806137085, "learning_rate": 2.6174051471984102e-05, "loss": 0.8368679881095886, "step": 8243 }, { "epoch": 10.115337423312884, "grad_norm": 0.28835421800613403, "learning_rate": 2.6169037826084824e-05, "loss": 0.5148819088935852, "step": 8244 }, { "epoch": 10.116564417177914, "grad_norm": 0.2829461991786957, "learning_rate": 2.6164024133064964e-05, "loss": 0.42012691497802734, "step": 8245 }, { "epoch": 10.117791411042944, "grad_norm": 0.251137375831604, "learning_rate": 2.6159010393126604e-05, "loss": 0.8548045754432678, "step": 8246 }, { "epoch": 10.119018404907976, "grad_norm": 0.3302762806415558, "learning_rate": 2.615399660647185e-05, "loss": 0.650842547416687, "step": 8247 }, { "epoch": 10.120245398773006, "grad_norm": 0.29319366812705994, "learning_rate": 2.614898277330278e-05, "loss": 0.6537247896194458, "step": 8248 }, { "epoch": 10.121472392638037, "grad_norm": 0.27653345465660095, "learning_rate": 2.6143968893821487e-05, "loss": 0.7506837844848633, "step": 8249 }, { "epoch": 10.122699386503067, "grad_norm": 0.28134629130363464, "learning_rate": 2.6138954968230073e-05, "loss": 0.4678904414176941, "step": 8250 }, { "epoch": 10.123926380368099, "grad_norm": 0.2936533987522125, "learning_rate": 2.6133940996730632e-05, "loss": 0.6226942539215088, "step": 8251 }, { "epoch": 10.125153374233129, "grad_norm": 0.2509269416332245, "learning_rate": 2.612892697952527e-05, "loss": 0.7913237810134888, "step": 8252 }, { "epoch": 10.12638036809816, "grad_norm": 0.3002605736255646, "learning_rate": 2.6123912916816074e-05, "loss": 0.907334566116333, "step": 8253 }, { "epoch": 10.12760736196319, "grad_norm": 0.24489977955818176, "learning_rate": 2.6118898808805164e-05, "loss": 0.7841325998306274, "step": 8254 }, { "epoch": 10.128834355828221, "grad_norm": 0.351239413022995, "learning_rate": 2.6113884655694625e-05, "loss": 0.5812082290649414, "step": 8255 }, { "epoch": 10.130061349693252, "grad_norm": 0.2738291919231415, "learning_rate": 2.6108870457686573e-05, "loss": 0.6085813045501709, "step": 8256 }, { "epoch": 10.131288343558282, "grad_norm": 0.23842540383338928, "learning_rate": 2.610385621498312e-05, "loss": 0.7787251472473145, "step": 8257 }, { "epoch": 10.132515337423312, "grad_norm": 0.2652827501296997, "learning_rate": 2.6098841927786365e-05, "loss": 0.6421768069267273, "step": 8258 }, { "epoch": 10.133742331288344, "grad_norm": 0.2509157657623291, "learning_rate": 2.6093827596298436e-05, "loss": 0.6831662654876709, "step": 8259 }, { "epoch": 10.134969325153374, "grad_norm": 0.2511208653450012, "learning_rate": 2.6088813220721427e-05, "loss": 0.6115875840187073, "step": 8260 }, { "epoch": 10.136196319018405, "grad_norm": 0.27761393785476685, "learning_rate": 2.6083798801257464e-05, "loss": 0.6429685354232788, "step": 8261 }, { "epoch": 10.137423312883435, "grad_norm": 0.29134008288383484, "learning_rate": 2.607878433810866e-05, "loss": 0.44919002056121826, "step": 8262 }, { "epoch": 10.138650306748467, "grad_norm": 0.25705137848854065, "learning_rate": 2.607376983147714e-05, "loss": 0.6249508261680603, "step": 8263 }, { "epoch": 10.139877300613497, "grad_norm": 0.20985442399978638, "learning_rate": 2.606875528156502e-05, "loss": 0.6939468383789062, "step": 8264 }, { "epoch": 10.141104294478527, "grad_norm": 0.25290295481681824, "learning_rate": 2.6063740688574423e-05, "loss": 0.6061895489692688, "step": 8265 }, { "epoch": 10.142331288343557, "grad_norm": 0.2666078507900238, "learning_rate": 2.605872605270748e-05, "loss": 0.5845192670822144, "step": 8266 }, { "epoch": 10.14355828220859, "grad_norm": 0.250969797372818, "learning_rate": 2.6053711374166296e-05, "loss": 0.7942438125610352, "step": 8267 }, { "epoch": 10.14478527607362, "grad_norm": 0.25019603967666626, "learning_rate": 2.6048696653153014e-05, "loss": 0.7835149765014648, "step": 8268 }, { "epoch": 10.14601226993865, "grad_norm": 0.27948811650276184, "learning_rate": 2.604368188986977e-05, "loss": 0.5126911401748657, "step": 8269 }, { "epoch": 10.14723926380368, "grad_norm": 0.28168320655822754, "learning_rate": 2.603866708451868e-05, "loss": 0.7967839241027832, "step": 8270 }, { "epoch": 10.148466257668712, "grad_norm": 0.30265700817108154, "learning_rate": 2.6033652237301887e-05, "loss": 0.7132116556167603, "step": 8271 }, { "epoch": 10.149693251533742, "grad_norm": 0.2595286965370178, "learning_rate": 2.602863734842151e-05, "loss": 0.6086565256118774, "step": 8272 }, { "epoch": 10.150920245398773, "grad_norm": 0.31670624017715454, "learning_rate": 2.60236224180797e-05, "loss": 0.4937927722930908, "step": 8273 }, { "epoch": 10.152147239263805, "grad_norm": 0.28263944387435913, "learning_rate": 2.6018607446478593e-05, "loss": 0.6915067434310913, "step": 8274 }, { "epoch": 10.153374233128835, "grad_norm": 0.2987024486064911, "learning_rate": 2.601359243382032e-05, "loss": 0.6623202562332153, "step": 8275 }, { "epoch": 10.154601226993865, "grad_norm": 0.35910043120384216, "learning_rate": 2.6008577380307037e-05, "loss": 0.38211125135421753, "step": 8276 }, { "epoch": 10.155828220858895, "grad_norm": 0.2507875859737396, "learning_rate": 2.6003562286140864e-05, "loss": 0.7392926812171936, "step": 8277 }, { "epoch": 10.157055214723927, "grad_norm": 0.25697118043899536, "learning_rate": 2.5998547151523966e-05, "loss": 0.8018653392791748, "step": 8278 }, { "epoch": 10.158282208588957, "grad_norm": 0.35853028297424316, "learning_rate": 2.5993531976658474e-05, "loss": 0.3692238926887512, "step": 8279 }, { "epoch": 10.159509202453988, "grad_norm": 0.26058706641197205, "learning_rate": 2.5988516761746546e-05, "loss": 0.7191330194473267, "step": 8280 }, { "epoch": 10.160736196319018, "grad_norm": 0.2690278887748718, "learning_rate": 2.5983501506990326e-05, "loss": 0.7009729146957397, "step": 8281 }, { "epoch": 10.16196319018405, "grad_norm": 0.2628236711025238, "learning_rate": 2.597848621259197e-05, "loss": 0.6911848783493042, "step": 8282 }, { "epoch": 10.16319018404908, "grad_norm": 0.26874810457229614, "learning_rate": 2.5973470878753615e-05, "loss": 0.6665494441986084, "step": 8283 }, { "epoch": 10.16441717791411, "grad_norm": 0.21247056126594543, "learning_rate": 2.596845550567743e-05, "loss": 0.7644167542457581, "step": 8284 }, { "epoch": 10.16564417177914, "grad_norm": 0.2522313892841339, "learning_rate": 2.5963440093565566e-05, "loss": 0.5939326286315918, "step": 8285 }, { "epoch": 10.166871165644173, "grad_norm": 0.26354461908340454, "learning_rate": 2.595842464262018e-05, "loss": 0.7556982040405273, "step": 8286 }, { "epoch": 10.168098159509203, "grad_norm": 0.25094565749168396, "learning_rate": 2.595340915304343e-05, "loss": 0.587921142578125, "step": 8287 }, { "epoch": 10.169325153374233, "grad_norm": 0.2804667353630066, "learning_rate": 2.5948393625037475e-05, "loss": 0.7133681774139404, "step": 8288 }, { "epoch": 10.170552147239263, "grad_norm": 0.2898728847503662, "learning_rate": 2.594337805880448e-05, "loss": 0.44420507550239563, "step": 8289 }, { "epoch": 10.171779141104295, "grad_norm": 0.312505304813385, "learning_rate": 2.5938362454546612e-05, "loss": 0.553981363773346, "step": 8290 }, { "epoch": 10.173006134969325, "grad_norm": 0.2691432535648346, "learning_rate": 2.593334681246602e-05, "loss": 0.7890554070472717, "step": 8291 }, { "epoch": 10.174233128834356, "grad_norm": 0.2546508312225342, "learning_rate": 2.5928331132764878e-05, "loss": 0.6890823841094971, "step": 8292 }, { "epoch": 10.175460122699386, "grad_norm": 0.23546458780765533, "learning_rate": 2.5923315415645366e-05, "loss": 0.7802841663360596, "step": 8293 }, { "epoch": 10.176687116564418, "grad_norm": 0.2342694252729416, "learning_rate": 2.591829966130963e-05, "loss": 0.685541033744812, "step": 8294 }, { "epoch": 10.177914110429448, "grad_norm": 0.25440704822540283, "learning_rate": 2.5913283869959864e-05, "loss": 0.6706252098083496, "step": 8295 }, { "epoch": 10.179141104294478, "grad_norm": 0.23941117525100708, "learning_rate": 2.590826804179823e-05, "loss": 0.6608953475952148, "step": 8296 }, { "epoch": 10.180368098159509, "grad_norm": 0.2665828764438629, "learning_rate": 2.59032521770269e-05, "loss": 0.6494262218475342, "step": 8297 }, { "epoch": 10.18159509202454, "grad_norm": 0.30647212266921997, "learning_rate": 2.5898236275848047e-05, "loss": 0.6096402406692505, "step": 8298 }, { "epoch": 10.18282208588957, "grad_norm": 0.273303747177124, "learning_rate": 2.5893220338463852e-05, "loss": 0.5828864574432373, "step": 8299 }, { "epoch": 10.184049079754601, "grad_norm": 0.29609590768814087, "learning_rate": 2.58882043650765e-05, "loss": 0.6918210387229919, "step": 8300 }, { "epoch": 10.185276073619631, "grad_norm": 0.2282705157995224, "learning_rate": 2.5883188355888154e-05, "loss": 0.7179142236709595, "step": 8301 }, { "epoch": 10.186503067484663, "grad_norm": 0.32012394070625305, "learning_rate": 2.5878172311101008e-05, "loss": 0.5682701468467712, "step": 8302 }, { "epoch": 10.187730061349694, "grad_norm": 0.32618531584739685, "learning_rate": 2.5873156230917246e-05, "loss": 0.7457494735717773, "step": 8303 }, { "epoch": 10.188957055214724, "grad_norm": 0.29173868894577026, "learning_rate": 2.586814011553904e-05, "loss": 0.7070698142051697, "step": 8304 }, { "epoch": 10.190184049079754, "grad_norm": 0.3044675886631012, "learning_rate": 2.5863123965168584e-05, "loss": 0.6622821092605591, "step": 8305 }, { "epoch": 10.191411042944786, "grad_norm": 0.26608118414878845, "learning_rate": 2.5858107780008062e-05, "loss": 0.6083918809890747, "step": 8306 }, { "epoch": 10.192638036809816, "grad_norm": 0.22857466340065002, "learning_rate": 2.5853091560259667e-05, "loss": 0.7191613912582397, "step": 8307 }, { "epoch": 10.193865030674846, "grad_norm": 0.21695400774478912, "learning_rate": 2.584807530612558e-05, "loss": 0.6878112554550171, "step": 8308 }, { "epoch": 10.195092024539877, "grad_norm": 0.26190412044525146, "learning_rate": 2.584305901780799e-05, "loss": 0.6287902593612671, "step": 8309 }, { "epoch": 10.196319018404909, "grad_norm": 0.2675197422504425, "learning_rate": 2.5838042695509108e-05, "loss": 0.6125367879867554, "step": 8310 }, { "epoch": 10.197546012269939, "grad_norm": 0.269765168428421, "learning_rate": 2.583302633943111e-05, "loss": 0.5385411381721497, "step": 8311 }, { "epoch": 10.198773006134969, "grad_norm": 0.2904207408428192, "learning_rate": 2.58280099497762e-05, "loss": 0.6449041366577148, "step": 8312 }, { "epoch": 10.2, "grad_norm": 0.2786148190498352, "learning_rate": 2.5822993526746565e-05, "loss": 0.7457606792449951, "step": 8313 }, { "epoch": 10.201226993865031, "grad_norm": 0.28005924820899963, "learning_rate": 2.5817977070544407e-05, "loss": 0.8169186115264893, "step": 8314 }, { "epoch": 10.202453987730062, "grad_norm": 0.23778730630874634, "learning_rate": 2.581296058137193e-05, "loss": 0.7178627252578735, "step": 8315 }, { "epoch": 10.203680981595092, "grad_norm": 0.27676811814308167, "learning_rate": 2.5807944059431322e-05, "loss": 0.5801066160202026, "step": 8316 }, { "epoch": 10.204907975460122, "grad_norm": 0.2596217095851898, "learning_rate": 2.5802927504924807e-05, "loss": 0.5877034664154053, "step": 8317 }, { "epoch": 10.206134969325154, "grad_norm": 0.31330040097236633, "learning_rate": 2.5797910918054562e-05, "loss": 0.5843008756637573, "step": 8318 }, { "epoch": 10.207361963190184, "grad_norm": 0.2742171883583069, "learning_rate": 2.5792894299022812e-05, "loss": 0.45885491371154785, "step": 8319 }, { "epoch": 10.208588957055214, "grad_norm": 0.2906065583229065, "learning_rate": 2.5787877648031756e-05, "loss": 0.6262652277946472, "step": 8320 }, { "epoch": 10.209815950920245, "grad_norm": 0.2692144513130188, "learning_rate": 2.578286096528359e-05, "loss": 0.7044481039047241, "step": 8321 }, { "epoch": 10.211042944785277, "grad_norm": 0.29658272862434387, "learning_rate": 2.5777844250980537e-05, "loss": 0.6679168343544006, "step": 8322 }, { "epoch": 10.212269938650307, "grad_norm": 0.2599563002586365, "learning_rate": 2.5772827505324805e-05, "loss": 0.6973172426223755, "step": 8323 }, { "epoch": 10.213496932515337, "grad_norm": 0.3744294047355652, "learning_rate": 2.5767810728518592e-05, "loss": 0.5978968143463135, "step": 8324 }, { "epoch": 10.214723926380367, "grad_norm": 0.2393745332956314, "learning_rate": 2.5762793920764124e-05, "loss": 0.565445601940155, "step": 8325 }, { "epoch": 10.2159509202454, "grad_norm": 0.29957282543182373, "learning_rate": 2.5757777082263602e-05, "loss": 0.636104166507721, "step": 8326 }, { "epoch": 10.21717791411043, "grad_norm": 0.22733287513256073, "learning_rate": 2.5752760213219253e-05, "loss": 0.5171756148338318, "step": 8327 }, { "epoch": 10.21840490797546, "grad_norm": 0.2715461552143097, "learning_rate": 2.5747743313833285e-05, "loss": 0.7367927432060242, "step": 8328 }, { "epoch": 10.21963190184049, "grad_norm": 0.26770853996276855, "learning_rate": 2.5742726384307913e-05, "loss": 0.45828181505203247, "step": 8329 }, { "epoch": 10.220858895705522, "grad_norm": 0.250907301902771, "learning_rate": 2.5737709424845364e-05, "loss": 0.6578851938247681, "step": 8330 }, { "epoch": 10.222085889570552, "grad_norm": 0.2444201558828354, "learning_rate": 2.5732692435647852e-05, "loss": 0.6597899198532104, "step": 8331 }, { "epoch": 10.223312883435582, "grad_norm": 0.21170251071453094, "learning_rate": 2.5727675416917595e-05, "loss": 0.7833452224731445, "step": 8332 }, { "epoch": 10.224539877300613, "grad_norm": 0.23125050961971283, "learning_rate": 2.5722658368856816e-05, "loss": 0.5099759697914124, "step": 8333 }, { "epoch": 10.225766871165645, "grad_norm": 0.25140151381492615, "learning_rate": 2.571764129166775e-05, "loss": 0.6209156513214111, "step": 8334 }, { "epoch": 10.226993865030675, "grad_norm": 0.2771191895008087, "learning_rate": 2.5712624185552597e-05, "loss": 0.8555119037628174, "step": 8335 }, { "epoch": 10.228220858895705, "grad_norm": 0.2599078416824341, "learning_rate": 2.5707607050713604e-05, "loss": 0.5977950692176819, "step": 8336 }, { "epoch": 10.229447852760735, "grad_norm": 0.26911646127700806, "learning_rate": 2.5702589887352986e-05, "loss": 0.6866577863693237, "step": 8337 }, { "epoch": 10.230674846625767, "grad_norm": 0.29872316122055054, "learning_rate": 2.5697572695672974e-05, "loss": 0.6659030318260193, "step": 8338 }, { "epoch": 10.231901840490798, "grad_norm": 0.30611783266067505, "learning_rate": 2.569255547587579e-05, "loss": 0.5422796010971069, "step": 8339 }, { "epoch": 10.233128834355828, "grad_norm": 0.26473817229270935, "learning_rate": 2.568753822816367e-05, "loss": 0.7511699199676514, "step": 8340 }, { "epoch": 10.23435582822086, "grad_norm": 0.26082301139831543, "learning_rate": 2.568252095273886e-05, "loss": 0.6095864772796631, "step": 8341 }, { "epoch": 10.23558282208589, "grad_norm": 0.3052963614463806, "learning_rate": 2.5677503649803563e-05, "loss": 0.5821636915206909, "step": 8342 }, { "epoch": 10.23680981595092, "grad_norm": 0.22262528538703918, "learning_rate": 2.5672486319560036e-05, "loss": 0.5667736530303955, "step": 8343 }, { "epoch": 10.23803680981595, "grad_norm": 0.24532143771648407, "learning_rate": 2.5667468962210495e-05, "loss": 0.6260351538658142, "step": 8344 }, { "epoch": 10.239263803680982, "grad_norm": 0.32159069180488586, "learning_rate": 2.566245157795718e-05, "loss": 0.5135701298713684, "step": 8345 }, { "epoch": 10.240490797546013, "grad_norm": 0.27355337142944336, "learning_rate": 2.5657434167002343e-05, "loss": 0.5809930562973022, "step": 8346 }, { "epoch": 10.241717791411043, "grad_norm": 0.2673709988594055, "learning_rate": 2.5652416729548206e-05, "loss": 0.5520628690719604, "step": 8347 }, { "epoch": 10.242944785276073, "grad_norm": 0.29340043663978577, "learning_rate": 2.5647399265797006e-05, "loss": 0.6456459760665894, "step": 8348 }, { "epoch": 10.244171779141105, "grad_norm": 0.2884678244590759, "learning_rate": 2.5642381775950998e-05, "loss": 0.7493658065795898, "step": 8349 }, { "epoch": 10.245398773006135, "grad_norm": 0.311115562915802, "learning_rate": 2.56373642602124e-05, "loss": 0.2257729470729828, "step": 8350 }, { "epoch": 10.246625766871166, "grad_norm": 0.28421682119369507, "learning_rate": 2.5632346718783473e-05, "loss": 0.5154801607131958, "step": 8351 }, { "epoch": 10.247852760736196, "grad_norm": 0.2945849597454071, "learning_rate": 2.5627329151866452e-05, "loss": 0.6007100939750671, "step": 8352 }, { "epoch": 10.249079754601228, "grad_norm": 0.2795694172382355, "learning_rate": 2.5622311559663593e-05, "loss": 0.7148411273956299, "step": 8353 }, { "epoch": 10.250306748466258, "grad_norm": 0.31845346093177795, "learning_rate": 2.5617293942377114e-05, "loss": 0.47604066133499146, "step": 8354 }, { "epoch": 10.251533742331288, "grad_norm": 0.20789776742458344, "learning_rate": 2.5612276300209283e-05, "loss": 0.7057470083236694, "step": 8355 }, { "epoch": 10.252760736196318, "grad_norm": 0.26632562279701233, "learning_rate": 2.5607258633362347e-05, "loss": 0.6316274404525757, "step": 8356 }, { "epoch": 10.25398773006135, "grad_norm": 0.2812019884586334, "learning_rate": 2.5602240942038534e-05, "loss": 0.5365169048309326, "step": 8357 }, { "epoch": 10.25521472392638, "grad_norm": 0.24903228878974915, "learning_rate": 2.559722322644012e-05, "loss": 0.6682829260826111, "step": 8358 }, { "epoch": 10.256441717791411, "grad_norm": 0.26077714562416077, "learning_rate": 2.5592205486769332e-05, "loss": 0.5881049633026123, "step": 8359 }, { "epoch": 10.257668711656441, "grad_norm": 0.23551534116268158, "learning_rate": 2.5587187723228436e-05, "loss": 0.7051653861999512, "step": 8360 }, { "epoch": 10.258895705521473, "grad_norm": 0.26419195532798767, "learning_rate": 2.5582169936019666e-05, "loss": 0.5742355585098267, "step": 8361 }, { "epoch": 10.260122699386503, "grad_norm": 0.26452621817588806, "learning_rate": 2.5577152125345293e-05, "loss": 0.6418535709381104, "step": 8362 }, { "epoch": 10.261349693251534, "grad_norm": 0.26799899339675903, "learning_rate": 2.5572134291407572e-05, "loss": 0.5634461641311646, "step": 8363 }, { "epoch": 10.262576687116564, "grad_norm": 0.2562037706375122, "learning_rate": 2.556711643440874e-05, "loss": 0.675150990486145, "step": 8364 }, { "epoch": 10.263803680981596, "grad_norm": 0.327752947807312, "learning_rate": 2.5562098554551066e-05, "loss": 0.4195133447647095, "step": 8365 }, { "epoch": 10.265030674846626, "grad_norm": 0.2637660503387451, "learning_rate": 2.5557080652036803e-05, "loss": 0.6598120331764221, "step": 8366 }, { "epoch": 10.266257668711656, "grad_norm": 0.31163057684898376, "learning_rate": 2.5552062727068204e-05, "loss": 0.4600411355495453, "step": 8367 }, { "epoch": 10.267484662576686, "grad_norm": 0.2765330970287323, "learning_rate": 2.5547044779847536e-05, "loss": 0.6065881848335266, "step": 8368 }, { "epoch": 10.268711656441718, "grad_norm": 0.24168555438518524, "learning_rate": 2.5542026810577047e-05, "loss": 0.7706277370452881, "step": 8369 }, { "epoch": 10.269938650306749, "grad_norm": 0.24381932616233826, "learning_rate": 2.5537008819459014e-05, "loss": 0.4374602437019348, "step": 8370 }, { "epoch": 10.271165644171779, "grad_norm": 0.22023442387580872, "learning_rate": 2.5531990806695678e-05, "loss": 0.8089134097099304, "step": 8371 }, { "epoch": 10.27239263803681, "grad_norm": 0.30228525400161743, "learning_rate": 2.5526972772489306e-05, "loss": 0.6030732989311218, "step": 8372 }, { "epoch": 10.273619631901841, "grad_norm": 0.25033891201019287, "learning_rate": 2.5521954717042167e-05, "loss": 0.6281189918518066, "step": 8373 }, { "epoch": 10.274846625766871, "grad_norm": 0.2550143599510193, "learning_rate": 2.5516936640556528e-05, "loss": 0.6769936084747314, "step": 8374 }, { "epoch": 10.276073619631902, "grad_norm": 0.25306153297424316, "learning_rate": 2.5511918543234648e-05, "loss": 0.7542964816093445, "step": 8375 }, { "epoch": 10.277300613496932, "grad_norm": 0.3021625578403473, "learning_rate": 2.5506900425278785e-05, "loss": 0.459473192691803, "step": 8376 }, { "epoch": 10.278527607361964, "grad_norm": 0.31072482466697693, "learning_rate": 2.5501882286891216e-05, "loss": 0.666473388671875, "step": 8377 }, { "epoch": 10.279754601226994, "grad_norm": 0.32172641158103943, "learning_rate": 2.54968641282742e-05, "loss": 0.6917443871498108, "step": 8378 }, { "epoch": 10.280981595092024, "grad_norm": 0.3294048309326172, "learning_rate": 2.549184594963001e-05, "loss": 0.5710697174072266, "step": 8379 }, { "epoch": 10.282208588957054, "grad_norm": 0.30031904578208923, "learning_rate": 2.5486827751160912e-05, "loss": 0.6611201763153076, "step": 8380 }, { "epoch": 10.283435582822086, "grad_norm": 0.27655717730522156, "learning_rate": 2.548180953306918e-05, "loss": 0.6813526153564453, "step": 8381 }, { "epoch": 10.284662576687117, "grad_norm": 0.4039050340652466, "learning_rate": 2.547679129555708e-05, "loss": 0.692043662071228, "step": 8382 }, { "epoch": 10.285889570552147, "grad_norm": 0.2677185833454132, "learning_rate": 2.5471773038826867e-05, "loss": 0.823128342628479, "step": 8383 }, { "epoch": 10.287116564417177, "grad_norm": 0.3161236345767975, "learning_rate": 2.5466754763080848e-05, "loss": 0.588365912437439, "step": 8384 }, { "epoch": 10.28834355828221, "grad_norm": 0.2609193027019501, "learning_rate": 2.5461736468521264e-05, "loss": 0.7205974459648132, "step": 8385 }, { "epoch": 10.28957055214724, "grad_norm": 0.2974373996257782, "learning_rate": 2.5456718155350402e-05, "loss": 0.5408053398132324, "step": 8386 }, { "epoch": 10.29079754601227, "grad_norm": 0.2590084969997406, "learning_rate": 2.5451699823770542e-05, "loss": 0.5121535062789917, "step": 8387 }, { "epoch": 10.2920245398773, "grad_norm": 0.23854881525039673, "learning_rate": 2.544668147398394e-05, "loss": 0.7124186754226685, "step": 8388 }, { "epoch": 10.293251533742332, "grad_norm": 0.25999489426612854, "learning_rate": 2.5441663106192886e-05, "loss": 0.6822336316108704, "step": 8389 }, { "epoch": 10.294478527607362, "grad_norm": 0.2445707470178604, "learning_rate": 2.543664472059965e-05, "loss": 0.698962926864624, "step": 8390 }, { "epoch": 10.295705521472392, "grad_norm": 0.32345736026763916, "learning_rate": 2.5431626317406515e-05, "loss": 0.5603301525115967, "step": 8391 }, { "epoch": 10.296932515337422, "grad_norm": 0.2510673999786377, "learning_rate": 2.542660789681575e-05, "loss": 0.7703052163124084, "step": 8392 }, { "epoch": 10.298159509202454, "grad_norm": 0.29890403151512146, "learning_rate": 2.542158945902964e-05, "loss": 0.7467091679573059, "step": 8393 }, { "epoch": 10.299386503067485, "grad_norm": 0.27873414754867554, "learning_rate": 2.5416571004250456e-05, "loss": 0.6479943990707397, "step": 8394 }, { "epoch": 10.300613496932515, "grad_norm": 0.26450350880622864, "learning_rate": 2.541155253268049e-05, "loss": 0.5085701942443848, "step": 8395 }, { "epoch": 10.301840490797545, "grad_norm": 0.2661471962928772, "learning_rate": 2.540653404452201e-05, "loss": 0.6044854521751404, "step": 8396 }, { "epoch": 10.303067484662577, "grad_norm": 0.30516597628593445, "learning_rate": 2.5401515539977305e-05, "loss": 0.7128307223320007, "step": 8397 }, { "epoch": 10.304294478527607, "grad_norm": 0.2654366195201874, "learning_rate": 2.5396497019248656e-05, "loss": 0.6635530591011047, "step": 8398 }, { "epoch": 10.305521472392638, "grad_norm": 0.28911060094833374, "learning_rate": 2.5391478482538338e-05, "loss": 0.7438907623291016, "step": 8399 }, { "epoch": 10.30674846625767, "grad_norm": 0.24927130341529846, "learning_rate": 2.538645993004864e-05, "loss": 0.876227617263794, "step": 8400 }, { "epoch": 10.3079754601227, "grad_norm": 0.25746262073516846, "learning_rate": 2.5381441361981844e-05, "loss": 0.6339426040649414, "step": 8401 }, { "epoch": 10.30920245398773, "grad_norm": 0.2705438733100891, "learning_rate": 2.5376422778540244e-05, "loss": 0.7370259761810303, "step": 8402 }, { "epoch": 10.31042944785276, "grad_norm": 0.2601880431175232, "learning_rate": 2.53714041799261e-05, "loss": 0.6315937042236328, "step": 8403 }, { "epoch": 10.31165644171779, "grad_norm": 0.2655937373638153, "learning_rate": 2.5366385566341726e-05, "loss": 0.5922069549560547, "step": 8404 }, { "epoch": 10.312883435582823, "grad_norm": 0.31745484471321106, "learning_rate": 2.5361366937989383e-05, "loss": 0.8480597138404846, "step": 8405 }, { "epoch": 10.314110429447853, "grad_norm": 0.2354857623577118, "learning_rate": 2.535634829507138e-05, "loss": 0.6044259667396545, "step": 8406 }, { "epoch": 10.315337423312883, "grad_norm": 0.3088266849517822, "learning_rate": 2.5351329637789983e-05, "loss": 0.5870583057403564, "step": 8407 }, { "epoch": 10.316564417177915, "grad_norm": 0.23263591527938843, "learning_rate": 2.5346310966347498e-05, "loss": 0.5358743667602539, "step": 8408 }, { "epoch": 10.317791411042945, "grad_norm": 0.24357926845550537, "learning_rate": 2.5341292280946204e-05, "loss": 0.6543775796890259, "step": 8409 }, { "epoch": 10.319018404907975, "grad_norm": 0.2931361794471741, "learning_rate": 2.5336273581788394e-05, "loss": 0.44926315546035767, "step": 8410 }, { "epoch": 10.320245398773006, "grad_norm": 0.25721436738967896, "learning_rate": 2.5331254869076353e-05, "loss": 0.4723482131958008, "step": 8411 }, { "epoch": 10.321472392638038, "grad_norm": 0.26341724395751953, "learning_rate": 2.5326236143012372e-05, "loss": 0.7555718421936035, "step": 8412 }, { "epoch": 10.322699386503068, "grad_norm": 0.262838751077652, "learning_rate": 2.532121740379874e-05, "loss": 0.8217714428901672, "step": 8413 }, { "epoch": 10.323926380368098, "grad_norm": 0.29831963777542114, "learning_rate": 2.531619865163776e-05, "loss": 0.6319045424461365, "step": 8414 }, { "epoch": 10.325153374233128, "grad_norm": 0.22365306317806244, "learning_rate": 2.5311179886731706e-05, "loss": 0.6547838449478149, "step": 8415 }, { "epoch": 10.32638036809816, "grad_norm": 0.2378627061843872, "learning_rate": 2.5306161109282878e-05, "loss": 0.7734255790710449, "step": 8416 }, { "epoch": 10.32760736196319, "grad_norm": 0.2623487412929535, "learning_rate": 2.530114231949357e-05, "loss": 0.6970717906951904, "step": 8417 }, { "epoch": 10.32883435582822, "grad_norm": 0.2437349408864975, "learning_rate": 2.529612351756608e-05, "loss": 0.6002101898193359, "step": 8418 }, { "epoch": 10.330061349693251, "grad_norm": 0.27766868472099304, "learning_rate": 2.5291104703702684e-05, "loss": 0.663573145866394, "step": 8419 }, { "epoch": 10.331288343558283, "grad_norm": 0.2977846562862396, "learning_rate": 2.528608587810569e-05, "loss": 0.6098721027374268, "step": 8420 }, { "epoch": 10.332515337423313, "grad_norm": 0.24588662385940552, "learning_rate": 2.52810670409774e-05, "loss": 0.5733028650283813, "step": 8421 }, { "epoch": 10.333742331288343, "grad_norm": 0.2735345661640167, "learning_rate": 2.5276048192520097e-05, "loss": 0.7309633493423462, "step": 8422 }, { "epoch": 10.334969325153374, "grad_norm": 0.27197325229644775, "learning_rate": 2.5271029332936073e-05, "loss": 0.8037132024765015, "step": 8423 }, { "epoch": 10.336196319018406, "grad_norm": 0.28919485211372375, "learning_rate": 2.5266010462427635e-05, "loss": 0.6477870941162109, "step": 8424 }, { "epoch": 10.337423312883436, "grad_norm": 0.3073555529117584, "learning_rate": 2.5260991581197067e-05, "loss": 0.5181758403778076, "step": 8425 }, { "epoch": 10.338650306748466, "grad_norm": 0.23759996891021729, "learning_rate": 2.5255972689446682e-05, "loss": 0.7629985809326172, "step": 8426 }, { "epoch": 10.339877300613496, "grad_norm": 0.29577821493148804, "learning_rate": 2.5250953787378755e-05, "loss": 0.6280152797698975, "step": 8427 }, { "epoch": 10.341104294478528, "grad_norm": 0.3106782138347626, "learning_rate": 2.5245934875195608e-05, "loss": 0.8206924200057983, "step": 8428 }, { "epoch": 10.342331288343559, "grad_norm": 0.3133496046066284, "learning_rate": 2.524091595309952e-05, "loss": 0.6371510624885559, "step": 8429 }, { "epoch": 10.343558282208589, "grad_norm": 0.2553952634334564, "learning_rate": 2.5235897021292804e-05, "loss": 0.7471730709075928, "step": 8430 }, { "epoch": 10.344785276073619, "grad_norm": 0.37641626596450806, "learning_rate": 2.5230878079977748e-05, "loss": 0.4157148599624634, "step": 8431 }, { "epoch": 10.346012269938651, "grad_norm": 0.2999502718448639, "learning_rate": 2.5225859129356656e-05, "loss": 0.7318642139434814, "step": 8432 }, { "epoch": 10.347239263803681, "grad_norm": 0.24811366200447083, "learning_rate": 2.5220840169631826e-05, "loss": 0.7682701349258423, "step": 8433 }, { "epoch": 10.348466257668711, "grad_norm": 0.33765822649002075, "learning_rate": 2.5215821201005558e-05, "loss": 0.745922327041626, "step": 8434 }, { "epoch": 10.349693251533742, "grad_norm": 0.25514063239097595, "learning_rate": 2.521080222368016e-05, "loss": 0.5008875131607056, "step": 8435 }, { "epoch": 10.350920245398774, "grad_norm": 0.2585175335407257, "learning_rate": 2.5205783237857916e-05, "loss": 0.6687482595443726, "step": 8436 }, { "epoch": 10.352147239263804, "grad_norm": 0.22361578047275543, "learning_rate": 2.5200764243741136e-05, "loss": 0.735835075378418, "step": 8437 }, { "epoch": 10.353374233128834, "grad_norm": 0.25329703092575073, "learning_rate": 2.5195745241532132e-05, "loss": 0.8566901683807373, "step": 8438 }, { "epoch": 10.354601226993864, "grad_norm": 0.35301488637924194, "learning_rate": 2.5190726231433183e-05, "loss": 0.5333905220031738, "step": 8439 }, { "epoch": 10.355828220858896, "grad_norm": 0.29309993982315063, "learning_rate": 2.5185707213646613e-05, "loss": 0.33392274379730225, "step": 8440 }, { "epoch": 10.357055214723927, "grad_norm": 0.23624619841575623, "learning_rate": 2.518068818837471e-05, "loss": 0.6426944732666016, "step": 8441 }, { "epoch": 10.358282208588957, "grad_norm": 0.2815571129322052, "learning_rate": 2.5175669155819787e-05, "loss": 0.63181471824646, "step": 8442 }, { "epoch": 10.359509202453987, "grad_norm": 0.2580769658088684, "learning_rate": 2.5170650116184134e-05, "loss": 0.6070098876953125, "step": 8443 }, { "epoch": 10.360736196319019, "grad_norm": 0.34412091970443726, "learning_rate": 2.5165631069670066e-05, "loss": 0.460052490234375, "step": 8444 }, { "epoch": 10.36196319018405, "grad_norm": 0.2487204521894455, "learning_rate": 2.5160612016479884e-05, "loss": 0.712119460105896, "step": 8445 }, { "epoch": 10.36319018404908, "grad_norm": 0.2913006544113159, "learning_rate": 2.5155592956815886e-05, "loss": 0.545707643032074, "step": 8446 }, { "epoch": 10.36441717791411, "grad_norm": 0.2851625382900238, "learning_rate": 2.5150573890880386e-05, "loss": 0.4590756893157959, "step": 8447 }, { "epoch": 10.365644171779142, "grad_norm": 0.31903257966041565, "learning_rate": 2.514555481887567e-05, "loss": 0.6660257577896118, "step": 8448 }, { "epoch": 10.366871165644172, "grad_norm": 0.3192739188671112, "learning_rate": 2.5140535741004063e-05, "loss": 0.47987398505210876, "step": 8449 }, { "epoch": 10.368098159509202, "grad_norm": 0.2788856327533722, "learning_rate": 2.5135516657467855e-05, "loss": 0.6368774771690369, "step": 8450 }, { "epoch": 10.369325153374232, "grad_norm": 0.2868781089782715, "learning_rate": 2.5130497568469362e-05, "loss": 0.482793390750885, "step": 8451 }, { "epoch": 10.370552147239264, "grad_norm": 0.2979125678539276, "learning_rate": 2.512547847421089e-05, "loss": 0.49210479855537415, "step": 8452 }, { "epoch": 10.371779141104295, "grad_norm": 0.2555094361305237, "learning_rate": 2.5120459374894728e-05, "loss": 0.7742398381233215, "step": 8453 }, { "epoch": 10.373006134969325, "grad_norm": 0.3057664632797241, "learning_rate": 2.5115440270723195e-05, "loss": 0.5453558564186096, "step": 8454 }, { "epoch": 10.374233128834355, "grad_norm": 0.2526654899120331, "learning_rate": 2.5110421161898595e-05, "loss": 0.8135901689529419, "step": 8455 }, { "epoch": 10.375460122699387, "grad_norm": 0.33453816175460815, "learning_rate": 2.5105402048623232e-05, "loss": 0.32088881731033325, "step": 8456 }, { "epoch": 10.376687116564417, "grad_norm": 0.31482720375061035, "learning_rate": 2.510038293109942e-05, "loss": 0.6552755832672119, "step": 8457 }, { "epoch": 10.377914110429447, "grad_norm": 0.2842562198638916, "learning_rate": 2.5095363809529448e-05, "loss": 0.718262791633606, "step": 8458 }, { "epoch": 10.379141104294478, "grad_norm": 0.2719147801399231, "learning_rate": 2.5090344684115636e-05, "loss": 0.46309924125671387, "step": 8459 }, { "epoch": 10.38036809815951, "grad_norm": 0.268222838640213, "learning_rate": 2.508532555506029e-05, "loss": 0.6445283889770508, "step": 8460 }, { "epoch": 10.38159509202454, "grad_norm": 0.2663191854953766, "learning_rate": 2.5080306422565707e-05, "loss": 0.3465173542499542, "step": 8461 }, { "epoch": 10.38282208588957, "grad_norm": 0.2943006753921509, "learning_rate": 2.5075287286834214e-05, "loss": 0.7094184160232544, "step": 8462 }, { "epoch": 10.3840490797546, "grad_norm": 0.2699417173862457, "learning_rate": 2.5070268148068093e-05, "loss": 0.574294924736023, "step": 8463 }, { "epoch": 10.385276073619632, "grad_norm": 0.22396419942378998, "learning_rate": 2.5065249006469677e-05, "loss": 0.6711708307266235, "step": 8464 }, { "epoch": 10.386503067484663, "grad_norm": 0.29975318908691406, "learning_rate": 2.5060229862241248e-05, "loss": 0.641978919506073, "step": 8465 }, { "epoch": 10.387730061349693, "grad_norm": 0.2790152430534363, "learning_rate": 2.5055210715585127e-05, "loss": 0.5885699987411499, "step": 8466 }, { "epoch": 10.388957055214725, "grad_norm": 0.24954970180988312, "learning_rate": 2.5050191566703624e-05, "loss": 0.800916850566864, "step": 8467 }, { "epoch": 10.390184049079755, "grad_norm": 0.4020659625530243, "learning_rate": 2.5045172415799033e-05, "loss": 0.4303419589996338, "step": 8468 }, { "epoch": 10.391411042944785, "grad_norm": 0.2738519012928009, "learning_rate": 2.5040153263073684e-05, "loss": 0.7752941250801086, "step": 8469 }, { "epoch": 10.392638036809815, "grad_norm": 0.2686012089252472, "learning_rate": 2.5035134108729862e-05, "loss": 0.6878417730331421, "step": 8470 }, { "epoch": 10.393865030674847, "grad_norm": 0.3636164665222168, "learning_rate": 2.5030114952969885e-05, "loss": 0.5316140651702881, "step": 8471 }, { "epoch": 10.395092024539878, "grad_norm": 0.2821882665157318, "learning_rate": 2.5025095795996056e-05, "loss": 0.6622164249420166, "step": 8472 }, { "epoch": 10.396319018404908, "grad_norm": 0.3006989657878876, "learning_rate": 2.5020076638010693e-05, "loss": 0.6130434274673462, "step": 8473 }, { "epoch": 10.397546012269938, "grad_norm": 0.3193504512310028, "learning_rate": 2.5015057479216103e-05, "loss": 0.5832382440567017, "step": 8474 }, { "epoch": 10.39877300613497, "grad_norm": 0.274122029542923, "learning_rate": 2.5010038319814582e-05, "loss": 0.6329336762428284, "step": 8475 }, { "epoch": 10.4, "grad_norm": 0.27365046739578247, "learning_rate": 2.5005019160008448e-05, "loss": 0.7003805637359619, "step": 8476 }, { "epoch": 10.40122699386503, "grad_norm": 0.2992973029613495, "learning_rate": 2.5e-05, "loss": 0.2882940471172333, "step": 8477 }, { "epoch": 10.40245398773006, "grad_norm": 0.26062238216400146, "learning_rate": 2.4994980839991558e-05, "loss": 0.5344786643981934, "step": 8478 }, { "epoch": 10.403680981595093, "grad_norm": 0.25872400403022766, "learning_rate": 2.4989961680185427e-05, "loss": 0.5068426728248596, "step": 8479 }, { "epoch": 10.404907975460123, "grad_norm": 0.3751554489135742, "learning_rate": 2.4984942520783906e-05, "loss": 0.6804594397544861, "step": 8480 }, { "epoch": 10.406134969325153, "grad_norm": 0.2519703507423401, "learning_rate": 2.4979923361989312e-05, "loss": 0.5904736518859863, "step": 8481 }, { "epoch": 10.407361963190183, "grad_norm": 0.27293676137924194, "learning_rate": 2.4974904204003946e-05, "loss": 0.625269889831543, "step": 8482 }, { "epoch": 10.408588957055215, "grad_norm": 0.23451346158981323, "learning_rate": 2.4969885047030124e-05, "loss": 0.5477962493896484, "step": 8483 }, { "epoch": 10.409815950920246, "grad_norm": 0.23691192269325256, "learning_rate": 2.496486589127015e-05, "loss": 0.8283467292785645, "step": 8484 }, { "epoch": 10.411042944785276, "grad_norm": 0.26204758882522583, "learning_rate": 2.495984673692633e-05, "loss": 0.7546080946922302, "step": 8485 }, { "epoch": 10.412269938650306, "grad_norm": 0.2576735317707062, "learning_rate": 2.4954827584200976e-05, "loss": 0.7544941306114197, "step": 8486 }, { "epoch": 10.413496932515338, "grad_norm": 0.3741559684276581, "learning_rate": 2.494980843329639e-05, "loss": 0.6541256904602051, "step": 8487 }, { "epoch": 10.414723926380368, "grad_norm": 0.29659295082092285, "learning_rate": 2.494478928441488e-05, "loss": 0.5631659030914307, "step": 8488 }, { "epoch": 10.415950920245399, "grad_norm": 0.2824704945087433, "learning_rate": 2.4939770137758754e-05, "loss": 0.639650821685791, "step": 8489 }, { "epoch": 10.417177914110429, "grad_norm": 0.26702404022216797, "learning_rate": 2.493475099353033e-05, "loss": 0.4826497435569763, "step": 8490 }, { "epoch": 10.41840490797546, "grad_norm": 0.2626473307609558, "learning_rate": 2.4929731851931906e-05, "loss": 0.5085522532463074, "step": 8491 }, { "epoch": 10.419631901840491, "grad_norm": 0.2656061053276062, "learning_rate": 2.492471271316579e-05, "loss": 0.7473036646842957, "step": 8492 }, { "epoch": 10.420858895705521, "grad_norm": 0.2653738856315613, "learning_rate": 2.4919693577434292e-05, "loss": 0.6875101327896118, "step": 8493 }, { "epoch": 10.422085889570551, "grad_norm": 0.23611488938331604, "learning_rate": 2.491467444493971e-05, "loss": 0.6623505353927612, "step": 8494 }, { "epoch": 10.423312883435583, "grad_norm": 0.24670670926570892, "learning_rate": 2.4909655315884366e-05, "loss": 0.7554689645767212, "step": 8495 }, { "epoch": 10.424539877300614, "grad_norm": 0.31566742062568665, "learning_rate": 2.4904636190470558e-05, "loss": 0.6869875192642212, "step": 8496 }, { "epoch": 10.425766871165644, "grad_norm": 0.2593576908111572, "learning_rate": 2.4899617068900585e-05, "loss": 0.49033164978027344, "step": 8497 }, { "epoch": 10.426993865030674, "grad_norm": 0.28011181950569153, "learning_rate": 2.4894597951376773e-05, "loss": 0.8088973760604858, "step": 8498 }, { "epoch": 10.428220858895706, "grad_norm": 0.3136923313140869, "learning_rate": 2.4889578838101408e-05, "loss": 0.49651145935058594, "step": 8499 }, { "epoch": 10.429447852760736, "grad_norm": 0.2917376458644867, "learning_rate": 2.4884559729276814e-05, "loss": 0.6602247357368469, "step": 8500 }, { "epoch": 10.430674846625767, "grad_norm": 0.25211018323898315, "learning_rate": 2.4879540625105278e-05, "loss": 0.8159863352775574, "step": 8501 }, { "epoch": 10.431901840490797, "grad_norm": 0.2612776756286621, "learning_rate": 2.487452152578912e-05, "loss": 0.6363832950592041, "step": 8502 }, { "epoch": 10.433128834355829, "grad_norm": 0.2404642254114151, "learning_rate": 2.4869502431530644e-05, "loss": 0.6380456686019897, "step": 8503 }, { "epoch": 10.434355828220859, "grad_norm": 0.30129167437553406, "learning_rate": 2.4864483342532147e-05, "loss": 0.47507965564727783, "step": 8504 }, { "epoch": 10.43558282208589, "grad_norm": 0.23616550862789154, "learning_rate": 2.4859464258995943e-05, "loss": 0.5982967615127563, "step": 8505 }, { "epoch": 10.43680981595092, "grad_norm": 0.2635408639907837, "learning_rate": 2.4854445181124335e-05, "loss": 0.6567823886871338, "step": 8506 }, { "epoch": 10.438036809815952, "grad_norm": 0.300199955701828, "learning_rate": 2.4849426109119627e-05, "loss": 0.6668449640274048, "step": 8507 }, { "epoch": 10.439263803680982, "grad_norm": 0.27936261892318726, "learning_rate": 2.4844407043184123e-05, "loss": 0.671359658241272, "step": 8508 }, { "epoch": 10.440490797546012, "grad_norm": 0.23951439559459686, "learning_rate": 2.4839387983520126e-05, "loss": 0.7047315239906311, "step": 8509 }, { "epoch": 10.441717791411042, "grad_norm": 0.2771929204463959, "learning_rate": 2.483436893032994e-05, "loss": 0.8013664484024048, "step": 8510 }, { "epoch": 10.442944785276074, "grad_norm": 0.24053677916526794, "learning_rate": 2.4829349883815865e-05, "loss": 0.6250376105308533, "step": 8511 }, { "epoch": 10.444171779141104, "grad_norm": 0.3243662118911743, "learning_rate": 2.482433084418022e-05, "loss": 0.5953752398490906, "step": 8512 }, { "epoch": 10.445398773006135, "grad_norm": 0.21811948716640472, "learning_rate": 2.481931181162529e-05, "loss": 0.62542724609375, "step": 8513 }, { "epoch": 10.446625766871165, "grad_norm": 0.2812197208404541, "learning_rate": 2.4814292786353386e-05, "loss": 0.602693498134613, "step": 8514 }, { "epoch": 10.447852760736197, "grad_norm": 0.2840372920036316, "learning_rate": 2.480927376856682e-05, "loss": 0.6323431134223938, "step": 8515 }, { "epoch": 10.449079754601227, "grad_norm": 0.25152379274368286, "learning_rate": 2.4804254758467873e-05, "loss": 0.7822871208190918, "step": 8516 }, { "epoch": 10.450306748466257, "grad_norm": 0.25380027294158936, "learning_rate": 2.4799235756258866e-05, "loss": 0.703712522983551, "step": 8517 }, { "epoch": 10.451533742331288, "grad_norm": 0.27323105931282043, "learning_rate": 2.479421676214209e-05, "loss": 0.8418720960617065, "step": 8518 }, { "epoch": 10.45276073619632, "grad_norm": 0.28064826130867004, "learning_rate": 2.4789197776319847e-05, "loss": 0.7107676267623901, "step": 8519 }, { "epoch": 10.45398773006135, "grad_norm": 0.4551740884780884, "learning_rate": 2.4784178798994445e-05, "loss": 0.5060195922851562, "step": 8520 }, { "epoch": 10.45521472392638, "grad_norm": 0.2309061586856842, "learning_rate": 2.477915983036818e-05, "loss": 0.7454513311386108, "step": 8521 }, { "epoch": 10.45644171779141, "grad_norm": 0.24334049224853516, "learning_rate": 2.477414087064335e-05, "loss": 0.6310754418373108, "step": 8522 }, { "epoch": 10.457668711656442, "grad_norm": 0.22507157921791077, "learning_rate": 2.4769121920022258e-05, "loss": 0.5560670495033264, "step": 8523 }, { "epoch": 10.458895705521472, "grad_norm": 0.2700367867946625, "learning_rate": 2.47641029787072e-05, "loss": 0.5926555395126343, "step": 8524 }, { "epoch": 10.460122699386503, "grad_norm": 0.31534343957901, "learning_rate": 2.4759084046900486e-05, "loss": 0.6416264772415161, "step": 8525 }, { "epoch": 10.461349693251535, "grad_norm": 0.2679463326931, "learning_rate": 2.4754065124804398e-05, "loss": 0.6230063438415527, "step": 8526 }, { "epoch": 10.462576687116565, "grad_norm": 0.36767861247062683, "learning_rate": 2.474904621262125e-05, "loss": 0.5913743376731873, "step": 8527 }, { "epoch": 10.463803680981595, "grad_norm": 0.2849421501159668, "learning_rate": 2.474402731055333e-05, "loss": 0.5398232340812683, "step": 8528 }, { "epoch": 10.465030674846625, "grad_norm": 0.2688078284263611, "learning_rate": 2.473900841880294e-05, "loss": 0.7052228450775146, "step": 8529 }, { "epoch": 10.466257668711656, "grad_norm": 0.2840583920478821, "learning_rate": 2.4733989537572378e-05, "loss": 0.6447737216949463, "step": 8530 }, { "epoch": 10.467484662576688, "grad_norm": 0.27799174189567566, "learning_rate": 2.4728970667063933e-05, "loss": 0.7762763500213623, "step": 8531 }, { "epoch": 10.468711656441718, "grad_norm": 0.35113584995269775, "learning_rate": 2.4723951807479915e-05, "loss": 0.5268165469169617, "step": 8532 }, { "epoch": 10.469938650306748, "grad_norm": 0.38885778188705444, "learning_rate": 2.4718932959022598e-05, "loss": 0.4566587209701538, "step": 8533 }, { "epoch": 10.47116564417178, "grad_norm": 0.31094005703926086, "learning_rate": 2.4713914121894305e-05, "loss": 0.4948391020298004, "step": 8534 }, { "epoch": 10.47239263803681, "grad_norm": 0.2425796538591385, "learning_rate": 2.4708895296297312e-05, "loss": 0.8546991348266602, "step": 8535 }, { "epoch": 10.47361963190184, "grad_norm": 0.2610064148902893, "learning_rate": 2.4703876482433927e-05, "loss": 0.45325136184692383, "step": 8536 }, { "epoch": 10.47484662576687, "grad_norm": 0.25991523265838623, "learning_rate": 2.4698857680506433e-05, "loss": 0.6506160497665405, "step": 8537 }, { "epoch": 10.476073619631903, "grad_norm": 0.2889278829097748, "learning_rate": 2.469383889071712e-05, "loss": 0.7259328365325928, "step": 8538 }, { "epoch": 10.477300613496933, "grad_norm": 0.25942933559417725, "learning_rate": 2.46888201132683e-05, "loss": 0.8054681420326233, "step": 8539 }, { "epoch": 10.478527607361963, "grad_norm": 0.2821855843067169, "learning_rate": 2.4683801348362244e-05, "loss": 0.45488855242729187, "step": 8540 }, { "epoch": 10.479754601226993, "grad_norm": 0.33769968152046204, "learning_rate": 2.4678782596201263e-05, "loss": 0.7407034039497375, "step": 8541 }, { "epoch": 10.480981595092025, "grad_norm": 0.2874602675437927, "learning_rate": 2.4673763856987634e-05, "loss": 0.5023269653320312, "step": 8542 }, { "epoch": 10.482208588957056, "grad_norm": 0.3216334283351898, "learning_rate": 2.466874513092365e-05, "loss": 0.70367431640625, "step": 8543 }, { "epoch": 10.483435582822086, "grad_norm": 0.2894618809223175, "learning_rate": 2.4663726418211615e-05, "loss": 0.7720874547958374, "step": 8544 }, { "epoch": 10.484662576687116, "grad_norm": 0.2929539680480957, "learning_rate": 2.4658707719053798e-05, "loss": 0.6468245387077332, "step": 8545 }, { "epoch": 10.485889570552148, "grad_norm": 0.24216428399085999, "learning_rate": 2.4653689033652508e-05, "loss": 0.7033772468566895, "step": 8546 }, { "epoch": 10.487116564417178, "grad_norm": 0.30830660462379456, "learning_rate": 2.464867036221002e-05, "loss": 0.8828683495521545, "step": 8547 }, { "epoch": 10.488343558282208, "grad_norm": 0.31650322675704956, "learning_rate": 2.4643651704928627e-05, "loss": 0.5438829064369202, "step": 8548 }, { "epoch": 10.489570552147239, "grad_norm": 0.2599862813949585, "learning_rate": 2.463863306201062e-05, "loss": 0.5431233644485474, "step": 8549 }, { "epoch": 10.49079754601227, "grad_norm": 0.3415778875350952, "learning_rate": 2.4633614433658283e-05, "loss": 0.48096132278442383, "step": 8550 }, { "epoch": 10.4920245398773, "grad_norm": 0.2468360811471939, "learning_rate": 2.4628595820073907e-05, "loss": 0.7266697883605957, "step": 8551 }, { "epoch": 10.493251533742331, "grad_norm": 0.24442942440509796, "learning_rate": 2.462357722145977e-05, "loss": 0.742094874382019, "step": 8552 }, { "epoch": 10.494478527607361, "grad_norm": 0.27733156085014343, "learning_rate": 2.461855863801816e-05, "loss": 0.678604006767273, "step": 8553 }, { "epoch": 10.495705521472393, "grad_norm": 0.37023353576660156, "learning_rate": 2.461354006995137e-05, "loss": 0.3968826234340668, "step": 8554 }, { "epoch": 10.496932515337424, "grad_norm": 0.25882574915885925, "learning_rate": 2.4608521517461668e-05, "loss": 0.5796910524368286, "step": 8555 }, { "epoch": 10.498159509202454, "grad_norm": 0.26211684942245483, "learning_rate": 2.4603502980751346e-05, "loss": 0.6354279518127441, "step": 8556 }, { "epoch": 10.499386503067484, "grad_norm": 0.2541213929653168, "learning_rate": 2.4598484460022694e-05, "loss": 0.8454798460006714, "step": 8557 }, { "epoch": 10.500613496932516, "grad_norm": 0.2511369287967682, "learning_rate": 2.459346595547799e-05, "loss": 0.5377091765403748, "step": 8558 }, { "epoch": 10.501840490797546, "grad_norm": 0.23461106419563293, "learning_rate": 2.458844746731951e-05, "loss": 0.7262780666351318, "step": 8559 }, { "epoch": 10.503067484662576, "grad_norm": 0.2950054705142975, "learning_rate": 2.458342899574954e-05, "loss": 0.7428748607635498, "step": 8560 }, { "epoch": 10.504294478527607, "grad_norm": 0.24314886331558228, "learning_rate": 2.4578410540970364e-05, "loss": 0.7144368886947632, "step": 8561 }, { "epoch": 10.505521472392639, "grad_norm": 0.22648966312408447, "learning_rate": 2.4573392103184255e-05, "loss": 0.805681049823761, "step": 8562 }, { "epoch": 10.506748466257669, "grad_norm": 0.2480098009109497, "learning_rate": 2.4568373682593494e-05, "loss": 0.7895098924636841, "step": 8563 }, { "epoch": 10.5079754601227, "grad_norm": 0.34710264205932617, "learning_rate": 2.4563355279400353e-05, "loss": 0.6796207427978516, "step": 8564 }, { "epoch": 10.50920245398773, "grad_norm": 0.29179254174232483, "learning_rate": 2.4558336893807116e-05, "loss": 0.4756520986557007, "step": 8565 }, { "epoch": 10.510429447852761, "grad_norm": 0.2797548472881317, "learning_rate": 2.4553318526016066e-05, "loss": 0.5540924072265625, "step": 8566 }, { "epoch": 10.511656441717792, "grad_norm": 0.28098228573799133, "learning_rate": 2.4548300176229467e-05, "loss": 0.811314046382904, "step": 8567 }, { "epoch": 10.512883435582822, "grad_norm": 0.23905692994594574, "learning_rate": 2.4543281844649604e-05, "loss": 0.7325335741043091, "step": 8568 }, { "epoch": 10.514110429447852, "grad_norm": 0.3547450602054596, "learning_rate": 2.453826353147874e-05, "loss": 0.488200306892395, "step": 8569 }, { "epoch": 10.515337423312884, "grad_norm": 0.31931135058403015, "learning_rate": 2.453324523691916e-05, "loss": 0.5469461679458618, "step": 8570 }, { "epoch": 10.516564417177914, "grad_norm": 0.3011436462402344, "learning_rate": 2.452822696117314e-05, "loss": 0.4511723518371582, "step": 8571 }, { "epoch": 10.517791411042944, "grad_norm": 0.2455345094203949, "learning_rate": 2.4523208704442934e-05, "loss": 0.7201168537139893, "step": 8572 }, { "epoch": 10.519018404907975, "grad_norm": 0.23300273716449738, "learning_rate": 2.4518190466930833e-05, "loss": 0.8280701637268066, "step": 8573 }, { "epoch": 10.520245398773007, "grad_norm": 0.23801816999912262, "learning_rate": 2.4513172248839097e-05, "loss": 0.7802205085754395, "step": 8574 }, { "epoch": 10.521472392638037, "grad_norm": 0.23643141984939575, "learning_rate": 2.4508154050369998e-05, "loss": 0.8488987684249878, "step": 8575 }, { "epoch": 10.522699386503067, "grad_norm": 0.2516028881072998, "learning_rate": 2.450313587172581e-05, "loss": 0.6512961387634277, "step": 8576 }, { "epoch": 10.523926380368097, "grad_norm": 0.2620449662208557, "learning_rate": 2.4498117713108797e-05, "loss": 0.823072075843811, "step": 8577 }, { "epoch": 10.52515337423313, "grad_norm": 0.3477295935153961, "learning_rate": 2.4493099574721218e-05, "loss": 0.5274114608764648, "step": 8578 }, { "epoch": 10.52638036809816, "grad_norm": 0.3293758034706116, "learning_rate": 2.4488081456765354e-05, "loss": 0.7069448232650757, "step": 8579 }, { "epoch": 10.52760736196319, "grad_norm": 0.23485493659973145, "learning_rate": 2.4483063359443474e-05, "loss": 0.6753958463668823, "step": 8580 }, { "epoch": 10.52883435582822, "grad_norm": 0.3187940716743469, "learning_rate": 2.4478045282957832e-05, "loss": 0.579400897026062, "step": 8581 }, { "epoch": 10.530061349693252, "grad_norm": 0.25564780831336975, "learning_rate": 2.4473027227510696e-05, "loss": 0.7441174387931824, "step": 8582 }, { "epoch": 10.531288343558282, "grad_norm": 0.29761946201324463, "learning_rate": 2.446800919330433e-05, "loss": 0.48656606674194336, "step": 8583 }, { "epoch": 10.532515337423312, "grad_norm": 0.28768840432167053, "learning_rate": 2.446299118054099e-05, "loss": 0.6532493829727173, "step": 8584 }, { "epoch": 10.533742331288344, "grad_norm": 0.25736597180366516, "learning_rate": 2.4457973189422955e-05, "loss": 0.7846537828445435, "step": 8585 }, { "epoch": 10.534969325153375, "grad_norm": 0.2888180613517761, "learning_rate": 2.4452955220152467e-05, "loss": 0.7072136402130127, "step": 8586 }, { "epoch": 10.536196319018405, "grad_norm": 0.3021014332771301, "learning_rate": 2.44479372729318e-05, "loss": 0.36566996574401855, "step": 8587 }, { "epoch": 10.537423312883435, "grad_norm": 0.2690044939517975, "learning_rate": 2.4442919347963203e-05, "loss": 0.7070440649986267, "step": 8588 }, { "epoch": 10.538650306748465, "grad_norm": 0.294700562953949, "learning_rate": 2.4437901445448936e-05, "loss": 0.6789268851280212, "step": 8589 }, { "epoch": 10.539877300613497, "grad_norm": 0.30604982376098633, "learning_rate": 2.4432883565591266e-05, "loss": 0.740289568901062, "step": 8590 }, { "epoch": 10.541104294478528, "grad_norm": 0.2461385279893875, "learning_rate": 2.4427865708592434e-05, "loss": 0.7380899786949158, "step": 8591 }, { "epoch": 10.542331288343558, "grad_norm": 0.28940635919570923, "learning_rate": 2.442284787465471e-05, "loss": 0.7947530150413513, "step": 8592 }, { "epoch": 10.54355828220859, "grad_norm": 0.28172963857650757, "learning_rate": 2.4417830063980336e-05, "loss": 0.6809177398681641, "step": 8593 }, { "epoch": 10.54478527607362, "grad_norm": 0.29463133215904236, "learning_rate": 2.4412812276771574e-05, "loss": 0.8066480159759521, "step": 8594 }, { "epoch": 10.54601226993865, "grad_norm": 0.2591864764690399, "learning_rate": 2.4407794513230677e-05, "loss": 0.6307306289672852, "step": 8595 }, { "epoch": 10.54723926380368, "grad_norm": 0.22705237567424774, "learning_rate": 2.440277677355989e-05, "loss": 0.6333303451538086, "step": 8596 }, { "epoch": 10.548466257668712, "grad_norm": 0.28444406390190125, "learning_rate": 2.4397759057961475e-05, "loss": 0.6925194263458252, "step": 8597 }, { "epoch": 10.549693251533743, "grad_norm": 0.265340119600296, "learning_rate": 2.4392741366637666e-05, "loss": 0.6533501148223877, "step": 8598 }, { "epoch": 10.550920245398773, "grad_norm": 0.26838982105255127, "learning_rate": 2.4387723699790723e-05, "loss": 0.5718075037002563, "step": 8599 }, { "epoch": 10.552147239263803, "grad_norm": 0.2896531820297241, "learning_rate": 2.4382706057622885e-05, "loss": 0.6197938323020935, "step": 8600 }, { "epoch": 10.553374233128835, "grad_norm": 0.2772452235221863, "learning_rate": 2.4377688440336416e-05, "loss": 0.5524543523788452, "step": 8601 }, { "epoch": 10.554601226993865, "grad_norm": 0.26305559277534485, "learning_rate": 2.4372670848133547e-05, "loss": 0.6508936882019043, "step": 8602 }, { "epoch": 10.555828220858896, "grad_norm": 0.25436049699783325, "learning_rate": 2.4367653281216523e-05, "loss": 0.8374214172363281, "step": 8603 }, { "epoch": 10.557055214723926, "grad_norm": 0.3192073106765747, "learning_rate": 2.43626357397876e-05, "loss": 0.5494847297668457, "step": 8604 }, { "epoch": 10.558282208588958, "grad_norm": 0.2648421823978424, "learning_rate": 2.4357618224049008e-05, "loss": 0.6601401567459106, "step": 8605 }, { "epoch": 10.559509202453988, "grad_norm": 0.28033018112182617, "learning_rate": 2.4352600734203e-05, "loss": 0.773921549320221, "step": 8606 }, { "epoch": 10.560736196319018, "grad_norm": 0.2999536395072937, "learning_rate": 2.43475832704518e-05, "loss": 0.6472972631454468, "step": 8607 }, { "epoch": 10.561963190184048, "grad_norm": 0.35122039914131165, "learning_rate": 2.434256583299766e-05, "loss": 0.6294986009597778, "step": 8608 }, { "epoch": 10.56319018404908, "grad_norm": 1.553372859954834, "learning_rate": 2.4337548422042823e-05, "loss": 0.772462010383606, "step": 8609 }, { "epoch": 10.56441717791411, "grad_norm": 0.2744212746620178, "learning_rate": 2.4332531037789514e-05, "loss": 0.6185881495475769, "step": 8610 }, { "epoch": 10.565644171779141, "grad_norm": 0.2622712254524231, "learning_rate": 2.4327513680439974e-05, "loss": 0.6486813426017761, "step": 8611 }, { "epoch": 10.566871165644171, "grad_norm": 0.24659854173660278, "learning_rate": 2.4322496350196443e-05, "loss": 0.6971920728683472, "step": 8612 }, { "epoch": 10.568098159509203, "grad_norm": 0.31019261479377747, "learning_rate": 2.431747904726115e-05, "loss": 0.4113197326660156, "step": 8613 }, { "epoch": 10.569325153374233, "grad_norm": 0.4137357175350189, "learning_rate": 2.431246177183633e-05, "loss": 0.3874949812889099, "step": 8614 }, { "epoch": 10.570552147239264, "grad_norm": 0.2476353794336319, "learning_rate": 2.4307444524124215e-05, "loss": 0.6464533805847168, "step": 8615 }, { "epoch": 10.571779141104294, "grad_norm": 0.3879990875720978, "learning_rate": 2.4302427304327036e-05, "loss": 0.5496830344200134, "step": 8616 }, { "epoch": 10.573006134969326, "grad_norm": 0.3159087896347046, "learning_rate": 2.4297410112647026e-05, "loss": 0.4975379705429077, "step": 8617 }, { "epoch": 10.574233128834356, "grad_norm": 0.25509709119796753, "learning_rate": 2.4292392949286405e-05, "loss": 0.6001529693603516, "step": 8618 }, { "epoch": 10.575460122699386, "grad_norm": 0.23530776798725128, "learning_rate": 2.4287375814447412e-05, "loss": 0.6442315578460693, "step": 8619 }, { "epoch": 10.576687116564417, "grad_norm": 0.24689234793186188, "learning_rate": 2.4282358708332266e-05, "loss": 0.558246374130249, "step": 8620 }, { "epoch": 10.577914110429449, "grad_norm": 0.31429269909858704, "learning_rate": 2.427734163114319e-05, "loss": 0.7315741777420044, "step": 8621 }, { "epoch": 10.579141104294479, "grad_norm": 0.2808719873428345, "learning_rate": 2.4272324583082404e-05, "loss": 0.7223629951477051, "step": 8622 }, { "epoch": 10.580368098159509, "grad_norm": 0.25263434648513794, "learning_rate": 2.426730756435215e-05, "loss": 0.8779948949813843, "step": 8623 }, { "epoch": 10.58159509202454, "grad_norm": 0.2728976309299469, "learning_rate": 2.4262290575154635e-05, "loss": 0.7336211204528809, "step": 8624 }, { "epoch": 10.582822085889571, "grad_norm": 0.444823294878006, "learning_rate": 2.4257273615692083e-05, "loss": 0.7605115175247192, "step": 8625 }, { "epoch": 10.584049079754601, "grad_norm": 0.2484399676322937, "learning_rate": 2.4252256686166718e-05, "loss": 0.6737465858459473, "step": 8626 }, { "epoch": 10.585276073619632, "grad_norm": 0.33544331789016724, "learning_rate": 2.424723978678075e-05, "loss": 0.5356946587562561, "step": 8627 }, { "epoch": 10.586503067484662, "grad_norm": 0.32243943214416504, "learning_rate": 2.42422229177364e-05, "loss": 0.6229660511016846, "step": 8628 }, { "epoch": 10.587730061349694, "grad_norm": 0.2910864055156708, "learning_rate": 2.4237206079235882e-05, "loss": 0.5293774604797363, "step": 8629 }, { "epoch": 10.588957055214724, "grad_norm": 0.27459466457366943, "learning_rate": 2.4232189271481413e-05, "loss": 0.5768921375274658, "step": 8630 }, { "epoch": 10.590184049079754, "grad_norm": 0.3036739230155945, "learning_rate": 2.4227172494675204e-05, "loss": 0.709219217300415, "step": 8631 }, { "epoch": 10.591411042944785, "grad_norm": 0.3180431127548218, "learning_rate": 2.4222155749019465e-05, "loss": 0.5977102518081665, "step": 8632 }, { "epoch": 10.592638036809817, "grad_norm": 0.3476259410381317, "learning_rate": 2.4217139034716417e-05, "loss": 0.5518264770507812, "step": 8633 }, { "epoch": 10.593865030674847, "grad_norm": 0.3426552712917328, "learning_rate": 2.4212122351968253e-05, "loss": 0.798569917678833, "step": 8634 }, { "epoch": 10.595092024539877, "grad_norm": 0.38674601912498474, "learning_rate": 2.420710570097719e-05, "loss": 0.5681862831115723, "step": 8635 }, { "epoch": 10.596319018404907, "grad_norm": 0.23008063435554504, "learning_rate": 2.4202089081945443e-05, "loss": 0.7856913805007935, "step": 8636 }, { "epoch": 10.59754601226994, "grad_norm": 0.2868105471134186, "learning_rate": 2.4197072495075202e-05, "loss": 0.7376861572265625, "step": 8637 }, { "epoch": 10.59877300613497, "grad_norm": 0.2655849754810333, "learning_rate": 2.4192055940568683e-05, "loss": 0.6758420467376709, "step": 8638 }, { "epoch": 10.6, "grad_norm": 0.28314390778541565, "learning_rate": 2.418703941862808e-05, "loss": 0.6098980903625488, "step": 8639 }, { "epoch": 10.60122699386503, "grad_norm": 0.2672731280326843, "learning_rate": 2.41820229294556e-05, "loss": 0.760634183883667, "step": 8640 }, { "epoch": 10.602453987730062, "grad_norm": 0.25497788190841675, "learning_rate": 2.417700647325345e-05, "loss": 0.6023997068405151, "step": 8641 }, { "epoch": 10.603680981595092, "grad_norm": 0.2861447334289551, "learning_rate": 2.4171990050223813e-05, "loss": 0.6318211555480957, "step": 8642 }, { "epoch": 10.604907975460122, "grad_norm": 0.33066537976264954, "learning_rate": 2.4166973660568904e-05, "loss": 0.6950720548629761, "step": 8643 }, { "epoch": 10.606134969325154, "grad_norm": 0.3331977128982544, "learning_rate": 2.416195730449089e-05, "loss": 0.6382180452346802, "step": 8644 }, { "epoch": 10.607361963190185, "grad_norm": 0.28516143560409546, "learning_rate": 2.4156940982192008e-05, "loss": 0.6622705459594727, "step": 8645 }, { "epoch": 10.608588957055215, "grad_norm": 0.27264586091041565, "learning_rate": 2.4151924693874422e-05, "loss": 0.6333020925521851, "step": 8646 }, { "epoch": 10.609815950920245, "grad_norm": 0.28614115715026855, "learning_rate": 2.414690843974034e-05, "loss": 0.585776150226593, "step": 8647 }, { "epoch": 10.611042944785275, "grad_norm": 0.3008871376514435, "learning_rate": 2.414189221999194e-05, "loss": 0.5164549946784973, "step": 8648 }, { "epoch": 10.612269938650307, "grad_norm": 0.2863912880420685, "learning_rate": 2.4136876034831418e-05, "loss": 0.5338629484176636, "step": 8649 }, { "epoch": 10.613496932515337, "grad_norm": 0.26614224910736084, "learning_rate": 2.4131859884460965e-05, "loss": 0.3948901891708374, "step": 8650 }, { "epoch": 10.614723926380368, "grad_norm": 0.3133090138435364, "learning_rate": 2.4126843769082757e-05, "loss": 0.6893479228019714, "step": 8651 }, { "epoch": 10.6159509202454, "grad_norm": 0.2408757358789444, "learning_rate": 2.4121827688898995e-05, "loss": 0.7200133800506592, "step": 8652 }, { "epoch": 10.61717791411043, "grad_norm": 0.28991827368736267, "learning_rate": 2.4116811644111852e-05, "loss": 0.7303417921066284, "step": 8653 }, { "epoch": 10.61840490797546, "grad_norm": 0.2528984546661377, "learning_rate": 2.4111795634923505e-05, "loss": 0.6395998597145081, "step": 8654 }, { "epoch": 10.61963190184049, "grad_norm": 0.2653040289878845, "learning_rate": 2.4106779661536154e-05, "loss": 0.4767737090587616, "step": 8655 }, { "epoch": 10.62085889570552, "grad_norm": 0.2418249398469925, "learning_rate": 2.410176372415196e-05, "loss": 0.5291166305541992, "step": 8656 }, { "epoch": 10.622085889570553, "grad_norm": 0.29588520526885986, "learning_rate": 2.4096747822973106e-05, "loss": 0.6292276382446289, "step": 8657 }, { "epoch": 10.623312883435583, "grad_norm": 0.2812330722808838, "learning_rate": 2.409173195820178e-05, "loss": 0.6405599117279053, "step": 8658 }, { "epoch": 10.624539877300613, "grad_norm": 0.2638179659843445, "learning_rate": 2.4086716130040142e-05, "loss": 0.80460125207901, "step": 8659 }, { "epoch": 10.625766871165645, "grad_norm": 0.23012731969356537, "learning_rate": 2.4081700338690373e-05, "loss": 0.7005447149276733, "step": 8660 }, { "epoch": 10.626993865030675, "grad_norm": 0.24953654408454895, "learning_rate": 2.4076684584354646e-05, "loss": 0.5273942947387695, "step": 8661 }, { "epoch": 10.628220858895705, "grad_norm": 0.24052636325359344, "learning_rate": 2.4071668867235125e-05, "loss": 0.73674076795578, "step": 8662 }, { "epoch": 10.629447852760736, "grad_norm": 0.26381900906562805, "learning_rate": 2.4066653187533995e-05, "loss": 0.5173943042755127, "step": 8663 }, { "epoch": 10.630674846625768, "grad_norm": 0.30347496271133423, "learning_rate": 2.4061637545453403e-05, "loss": 0.5971142053604126, "step": 8664 }, { "epoch": 10.631901840490798, "grad_norm": 0.2828350067138672, "learning_rate": 2.405662194119553e-05, "loss": 0.8060944676399231, "step": 8665 }, { "epoch": 10.633128834355828, "grad_norm": 0.3481907844543457, "learning_rate": 2.405160637496252e-05, "loss": 0.5941370725631714, "step": 8666 }, { "epoch": 10.634355828220858, "grad_norm": 0.24524158239364624, "learning_rate": 2.4046590846956572e-05, "loss": 0.549896240234375, "step": 8667 }, { "epoch": 10.63558282208589, "grad_norm": 0.24673229455947876, "learning_rate": 2.404157535737982e-05, "loss": 0.3934140205383301, "step": 8668 }, { "epoch": 10.63680981595092, "grad_norm": 0.3605045676231384, "learning_rate": 2.403655990643444e-05, "loss": 0.5504812598228455, "step": 8669 }, { "epoch": 10.63803680981595, "grad_norm": 0.2351658046245575, "learning_rate": 2.4031544494322572e-05, "loss": 0.7209989428520203, "step": 8670 }, { "epoch": 10.639263803680981, "grad_norm": 0.33416661620140076, "learning_rate": 2.4026529121246387e-05, "loss": 0.6369730830192566, "step": 8671 }, { "epoch": 10.640490797546013, "grad_norm": 0.2780473530292511, "learning_rate": 2.402151378740804e-05, "loss": 0.7897509336471558, "step": 8672 }, { "epoch": 10.641717791411043, "grad_norm": 0.22753044962882996, "learning_rate": 2.4016498493009676e-05, "loss": 0.6181243658065796, "step": 8673 }, { "epoch": 10.642944785276073, "grad_norm": 0.27126288414001465, "learning_rate": 2.401148323825346e-05, "loss": 0.8577396869659424, "step": 8674 }, { "epoch": 10.644171779141104, "grad_norm": 0.32502639293670654, "learning_rate": 2.4006468023341528e-05, "loss": 0.588998556137085, "step": 8675 }, { "epoch": 10.645398773006136, "grad_norm": 0.32879459857940674, "learning_rate": 2.400145284847604e-05, "loss": 0.5594862699508667, "step": 8676 }, { "epoch": 10.646625766871166, "grad_norm": 0.2627117335796356, "learning_rate": 2.399643771385914e-05, "loss": 0.5621517896652222, "step": 8677 }, { "epoch": 10.647852760736196, "grad_norm": 0.3594359755516052, "learning_rate": 2.3991422619692972e-05, "loss": 0.4700095057487488, "step": 8678 }, { "epoch": 10.649079754601226, "grad_norm": 0.243622288107872, "learning_rate": 2.3986407566179682e-05, "loss": 0.8362303376197815, "step": 8679 }, { "epoch": 10.650306748466258, "grad_norm": 0.31808897852897644, "learning_rate": 2.3981392553521413e-05, "loss": 0.5775461196899414, "step": 8680 }, { "epoch": 10.651533742331289, "grad_norm": 0.2827227711677551, "learning_rate": 2.3976377581920302e-05, "loss": 0.7960809469223022, "step": 8681 }, { "epoch": 10.652760736196319, "grad_norm": 0.24147264659404755, "learning_rate": 2.3971362651578497e-05, "loss": 0.6920000910758972, "step": 8682 }, { "epoch": 10.653987730061349, "grad_norm": 0.27261486649513245, "learning_rate": 2.3966347762698125e-05, "loss": 0.4578436613082886, "step": 8683 }, { "epoch": 10.655214723926381, "grad_norm": 0.30013561248779297, "learning_rate": 2.396133291548133e-05, "loss": 0.7229864001274109, "step": 8684 }, { "epoch": 10.656441717791411, "grad_norm": 0.3068535029888153, "learning_rate": 2.3956318110130237e-05, "loss": 0.5808919072151184, "step": 8685 }, { "epoch": 10.657668711656441, "grad_norm": 0.28502383828163147, "learning_rate": 2.395130334684699e-05, "loss": 0.664986252784729, "step": 8686 }, { "epoch": 10.658895705521472, "grad_norm": 0.300489217042923, "learning_rate": 2.3946288625833714e-05, "loss": 0.4853936731815338, "step": 8687 }, { "epoch": 10.660122699386504, "grad_norm": 0.2632254660129547, "learning_rate": 2.3941273947292534e-05, "loss": 0.801907479763031, "step": 8688 }, { "epoch": 10.661349693251534, "grad_norm": 0.25779086351394653, "learning_rate": 2.3936259311425576e-05, "loss": 0.562423825263977, "step": 8689 }, { "epoch": 10.662576687116564, "grad_norm": 0.2396734207868576, "learning_rate": 2.3931244718434978e-05, "loss": 0.7369794845581055, "step": 8690 }, { "epoch": 10.663803680981594, "grad_norm": 0.28538885712623596, "learning_rate": 2.3926230168522863e-05, "loss": 0.6427059173583984, "step": 8691 }, { "epoch": 10.665030674846626, "grad_norm": 0.33397287130355835, "learning_rate": 2.392121566189134e-05, "loss": 0.45274117588996887, "step": 8692 }, { "epoch": 10.666257668711657, "grad_norm": 0.2661997973918915, "learning_rate": 2.3916201198742545e-05, "loss": 0.5660989284515381, "step": 8693 }, { "epoch": 10.667484662576687, "grad_norm": 0.3290131092071533, "learning_rate": 2.391118677927858e-05, "loss": 0.43919655680656433, "step": 8694 }, { "epoch": 10.668711656441717, "grad_norm": 0.2723711431026459, "learning_rate": 2.3906172403701573e-05, "loss": 0.6157090663909912, "step": 8695 }, { "epoch": 10.669938650306749, "grad_norm": 0.28445982933044434, "learning_rate": 2.390115807221364e-05, "loss": 0.6799089908599854, "step": 8696 }, { "epoch": 10.67116564417178, "grad_norm": 0.24570375680923462, "learning_rate": 2.3896143785016885e-05, "loss": 0.6139598488807678, "step": 8697 }, { "epoch": 10.67239263803681, "grad_norm": 0.30619746446609497, "learning_rate": 2.3891129542313436e-05, "loss": 0.7823454141616821, "step": 8698 }, { "epoch": 10.67361963190184, "grad_norm": 0.25127333402633667, "learning_rate": 2.388611534430538e-05, "loss": 0.69394850730896, "step": 8699 }, { "epoch": 10.674846625766872, "grad_norm": 0.2674449682235718, "learning_rate": 2.3881101191194842e-05, "loss": 0.4605584740638733, "step": 8700 }, { "epoch": 10.676073619631902, "grad_norm": 0.3533150255680084, "learning_rate": 2.387608708318393e-05, "loss": 0.5411710143089294, "step": 8701 }, { "epoch": 10.677300613496932, "grad_norm": 0.2821468412876129, "learning_rate": 2.3871073020474737e-05, "loss": 0.511638879776001, "step": 8702 }, { "epoch": 10.678527607361962, "grad_norm": 0.23729099333286285, "learning_rate": 2.3866059003269374e-05, "loss": 0.7811785340309143, "step": 8703 }, { "epoch": 10.679754601226994, "grad_norm": 0.2991519570350647, "learning_rate": 2.3861045031769933e-05, "loss": 0.6520867347717285, "step": 8704 }, { "epoch": 10.680981595092025, "grad_norm": 0.3456459045410156, "learning_rate": 2.385603110617852e-05, "loss": 0.5177605152130127, "step": 8705 }, { "epoch": 10.682208588957055, "grad_norm": 0.2943485975265503, "learning_rate": 2.3851017226697234e-05, "loss": 0.538166344165802, "step": 8706 }, { "epoch": 10.683435582822085, "grad_norm": 0.28651556372642517, "learning_rate": 2.3846003393528163e-05, "loss": 0.6337317228317261, "step": 8707 }, { "epoch": 10.684662576687117, "grad_norm": 0.24023549258708954, "learning_rate": 2.3840989606873402e-05, "loss": 0.7733289003372192, "step": 8708 }, { "epoch": 10.685889570552147, "grad_norm": 0.2502288520336151, "learning_rate": 2.3835975866935052e-05, "loss": 0.5362165570259094, "step": 8709 }, { "epoch": 10.687116564417177, "grad_norm": 0.27019447088241577, "learning_rate": 2.3830962173915185e-05, "loss": 0.8197352886199951, "step": 8710 }, { "epoch": 10.68834355828221, "grad_norm": 0.2923780679702759, "learning_rate": 2.3825948528015897e-05, "loss": 0.7214325666427612, "step": 8711 }, { "epoch": 10.68957055214724, "grad_norm": 0.29119759798049927, "learning_rate": 2.3820934929439282e-05, "loss": 0.8611778020858765, "step": 8712 }, { "epoch": 10.69079754601227, "grad_norm": 0.2799585461616516, "learning_rate": 2.381592137838742e-05, "loss": 0.7194989919662476, "step": 8713 }, { "epoch": 10.6920245398773, "grad_norm": 0.25168079137802124, "learning_rate": 2.3810907875062384e-05, "loss": 0.6048653721809387, "step": 8714 }, { "epoch": 10.69325153374233, "grad_norm": 0.2892126441001892, "learning_rate": 2.380589441966627e-05, "loss": 0.5584367513656616, "step": 8715 }, { "epoch": 10.694478527607362, "grad_norm": 0.3281988501548767, "learning_rate": 2.3800881012401134e-05, "loss": 0.6071350574493408, "step": 8716 }, { "epoch": 10.695705521472393, "grad_norm": 0.2646718919277191, "learning_rate": 2.379586765346907e-05, "loss": 0.6493820548057556, "step": 8717 }, { "epoch": 10.696932515337423, "grad_norm": 0.2507515549659729, "learning_rate": 2.379085434307215e-05, "loss": 0.7079512476921082, "step": 8718 }, { "epoch": 10.698159509202455, "grad_norm": 0.2865358293056488, "learning_rate": 2.3785841081412437e-05, "loss": 0.7463732957839966, "step": 8719 }, { "epoch": 10.699386503067485, "grad_norm": 0.33269327878952026, "learning_rate": 2.3780827868692015e-05, "loss": 0.3033893406391144, "step": 8720 }, { "epoch": 10.700613496932515, "grad_norm": 0.22639423608779907, "learning_rate": 2.3775814705112938e-05, "loss": 0.9110515117645264, "step": 8721 }, { "epoch": 10.701840490797546, "grad_norm": 0.3285481631755829, "learning_rate": 2.3770801590877277e-05, "loss": 0.7281612157821655, "step": 8722 }, { "epoch": 10.703067484662578, "grad_norm": 0.331730455160141, "learning_rate": 2.3765788526187105e-05, "loss": 0.7611326575279236, "step": 8723 }, { "epoch": 10.704294478527608, "grad_norm": 0.3393954634666443, "learning_rate": 2.3760775511244472e-05, "loss": 0.5540165305137634, "step": 8724 }, { "epoch": 10.705521472392638, "grad_norm": 0.32443538308143616, "learning_rate": 2.375576254625145e-05, "loss": 0.7207515239715576, "step": 8725 }, { "epoch": 10.706748466257668, "grad_norm": 0.2646844685077667, "learning_rate": 2.3750749631410082e-05, "loss": 0.7020849585533142, "step": 8726 }, { "epoch": 10.7079754601227, "grad_norm": 0.25769925117492676, "learning_rate": 2.3745736766922435e-05, "loss": 0.5707160234451294, "step": 8727 }, { "epoch": 10.70920245398773, "grad_norm": 0.37063902616500854, "learning_rate": 2.374072395299057e-05, "loss": 0.6668274402618408, "step": 8728 }, { "epoch": 10.71042944785276, "grad_norm": 0.28883031010627747, "learning_rate": 2.3735711189816523e-05, "loss": 0.584069550037384, "step": 8729 }, { "epoch": 10.71165644171779, "grad_norm": 0.2404700219631195, "learning_rate": 2.3730698477602357e-05, "loss": 0.7872483134269714, "step": 8730 }, { "epoch": 10.712883435582823, "grad_norm": 0.37568026781082153, "learning_rate": 2.372568581655011e-05, "loss": 0.6091381311416626, "step": 8731 }, { "epoch": 10.714110429447853, "grad_norm": 0.25320470333099365, "learning_rate": 2.3720673206861835e-05, "loss": 0.5410915613174438, "step": 8732 }, { "epoch": 10.715337423312883, "grad_norm": 0.24626658856868744, "learning_rate": 2.3715660648739568e-05, "loss": 0.8780654668807983, "step": 8733 }, { "epoch": 10.716564417177914, "grad_norm": 0.25801464915275574, "learning_rate": 2.3710648142385365e-05, "loss": 0.8381283283233643, "step": 8734 }, { "epoch": 10.717791411042946, "grad_norm": 0.24563060700893402, "learning_rate": 2.3705635688001256e-05, "loss": 0.5138614177703857, "step": 8735 }, { "epoch": 10.719018404907976, "grad_norm": 0.24459633231163025, "learning_rate": 2.370062328578928e-05, "loss": 0.6343666315078735, "step": 8736 }, { "epoch": 10.720245398773006, "grad_norm": 0.3337526321411133, "learning_rate": 2.3695610935951482e-05, "loss": 0.6810792684555054, "step": 8737 }, { "epoch": 10.721472392638036, "grad_norm": 0.3124784827232361, "learning_rate": 2.3690598638689877e-05, "loss": 0.6147270798683167, "step": 8738 }, { "epoch": 10.722699386503068, "grad_norm": 0.2940165400505066, "learning_rate": 2.3685586394206512e-05, "loss": 0.6041848659515381, "step": 8739 }, { "epoch": 10.723926380368098, "grad_norm": 0.34582433104515076, "learning_rate": 2.3680574202703408e-05, "loss": 0.5771229267120361, "step": 8740 }, { "epoch": 10.725153374233129, "grad_norm": 0.3082900643348694, "learning_rate": 2.367556206438259e-05, "loss": 0.5465226173400879, "step": 8741 }, { "epoch": 10.726380368098159, "grad_norm": 0.26440635323524475, "learning_rate": 2.3670549979446098e-05, "loss": 0.7182021737098694, "step": 8742 }, { "epoch": 10.72760736196319, "grad_norm": 0.2390621453523636, "learning_rate": 2.3665537948095938e-05, "loss": 0.6568495035171509, "step": 8743 }, { "epoch": 10.728834355828221, "grad_norm": 0.3590218424797058, "learning_rate": 2.3660525970534146e-05, "loss": 0.5347811579704285, "step": 8744 }, { "epoch": 10.730061349693251, "grad_norm": 0.2704881727695465, "learning_rate": 2.365551404696272e-05, "loss": 0.5679721832275391, "step": 8745 }, { "epoch": 10.731288343558282, "grad_norm": 0.30242908000946045, "learning_rate": 2.3650502177583692e-05, "loss": 0.6518030166625977, "step": 8746 }, { "epoch": 10.732515337423314, "grad_norm": 0.3095189034938812, "learning_rate": 2.3645490362599075e-05, "loss": 0.5948469638824463, "step": 8747 }, { "epoch": 10.733742331288344, "grad_norm": 0.27883976697921753, "learning_rate": 2.3640478602210876e-05, "loss": 0.5591486692428589, "step": 8748 }, { "epoch": 10.734969325153374, "grad_norm": 0.2546248733997345, "learning_rate": 2.363546689662111e-05, "loss": 0.5446109175682068, "step": 8749 }, { "epoch": 10.736196319018404, "grad_norm": 0.26257798075675964, "learning_rate": 2.3630455246031775e-05, "loss": 0.6189625859260559, "step": 8750 }, { "epoch": 10.737423312883436, "grad_norm": 0.23330286145210266, "learning_rate": 2.362544365064489e-05, "loss": 0.7324864864349365, "step": 8751 }, { "epoch": 10.738650306748466, "grad_norm": 0.3115295469760895, "learning_rate": 2.3620432110662448e-05, "loss": 0.4701656699180603, "step": 8752 }, { "epoch": 10.739877300613497, "grad_norm": 0.35164615511894226, "learning_rate": 2.3615420626286453e-05, "loss": 0.47589778900146484, "step": 8753 }, { "epoch": 10.741104294478527, "grad_norm": 0.31330975890159607, "learning_rate": 2.3610409197718908e-05, "loss": 0.6625005006790161, "step": 8754 }, { "epoch": 10.742331288343559, "grad_norm": 0.3231639862060547, "learning_rate": 2.360539782516179e-05, "loss": 0.6641806364059448, "step": 8755 }, { "epoch": 10.743558282208589, "grad_norm": 0.27905043959617615, "learning_rate": 2.360038650881712e-05, "loss": 0.5932775735855103, "step": 8756 }, { "epoch": 10.74478527607362, "grad_norm": 0.2418946921825409, "learning_rate": 2.3595375248886875e-05, "loss": 0.6428226828575134, "step": 8757 }, { "epoch": 10.74601226993865, "grad_norm": 0.23949003219604492, "learning_rate": 2.359036404557305e-05, "loss": 0.6924418807029724, "step": 8758 }, { "epoch": 10.747239263803682, "grad_norm": 0.23814524710178375, "learning_rate": 2.358535289907763e-05, "loss": 0.5781141519546509, "step": 8759 }, { "epoch": 10.748466257668712, "grad_norm": 0.2566531002521515, "learning_rate": 2.35803418096026e-05, "loss": 0.6420671939849854, "step": 8760 }, { "epoch": 10.749693251533742, "grad_norm": 0.26266512274742126, "learning_rate": 2.3575330777349945e-05, "loss": 0.8068056106567383, "step": 8761 }, { "epoch": 10.750920245398772, "grad_norm": 0.2957381308078766, "learning_rate": 2.3570319802521636e-05, "loss": 0.669196367263794, "step": 8762 }, { "epoch": 10.752147239263804, "grad_norm": 0.33868086338043213, "learning_rate": 2.356530888531966e-05, "loss": 0.6312408447265625, "step": 8763 }, { "epoch": 10.753374233128834, "grad_norm": 0.3039240539073944, "learning_rate": 2.3560298025945998e-05, "loss": 0.7040416598320007, "step": 8764 }, { "epoch": 10.754601226993865, "grad_norm": 0.2776903808116913, "learning_rate": 2.355528722460261e-05, "loss": 0.7686413526535034, "step": 8765 }, { "epoch": 10.755828220858895, "grad_norm": 0.3067583739757538, "learning_rate": 2.355027648149148e-05, "loss": 0.5049619674682617, "step": 8766 }, { "epoch": 10.757055214723927, "grad_norm": 0.2669585943222046, "learning_rate": 2.354526579681456e-05, "loss": 0.5616096258163452, "step": 8767 }, { "epoch": 10.758282208588957, "grad_norm": 0.270344078540802, "learning_rate": 2.3540255170773835e-05, "loss": 0.6131328344345093, "step": 8768 }, { "epoch": 10.759509202453987, "grad_norm": 0.2578679323196411, "learning_rate": 2.353524460357126e-05, "loss": 0.5714628100395203, "step": 8769 }, { "epoch": 10.76073619631902, "grad_norm": 0.2885792851448059, "learning_rate": 2.3530234095408792e-05, "loss": 0.6886682510375977, "step": 8770 }, { "epoch": 10.76196319018405, "grad_norm": 0.27310505509376526, "learning_rate": 2.3525223646488403e-05, "loss": 0.7450329661369324, "step": 8771 }, { "epoch": 10.76319018404908, "grad_norm": 0.2867407202720642, "learning_rate": 2.3520213257012037e-05, "loss": 0.6957541704177856, "step": 8772 }, { "epoch": 10.76441717791411, "grad_norm": 0.3071952164173126, "learning_rate": 2.3515202927181655e-05, "loss": 0.6307845115661621, "step": 8773 }, { "epoch": 10.76564417177914, "grad_norm": 0.3028204143047333, "learning_rate": 2.3510192657199212e-05, "loss": 0.6690443158149719, "step": 8774 }, { "epoch": 10.766871165644172, "grad_norm": 0.2870215177536011, "learning_rate": 2.3505182447266648e-05, "loss": 0.649038553237915, "step": 8775 }, { "epoch": 10.768098159509202, "grad_norm": 0.31912222504615784, "learning_rate": 2.3500172297585923e-05, "loss": 0.5797891020774841, "step": 8776 }, { "epoch": 10.769325153374233, "grad_norm": 0.2893097698688507, "learning_rate": 2.349516220835897e-05, "loss": 0.6879005432128906, "step": 8777 }, { "epoch": 10.770552147239265, "grad_norm": 0.27559056878089905, "learning_rate": 2.349015217978773e-05, "loss": 0.5351001620292664, "step": 8778 }, { "epoch": 10.771779141104295, "grad_norm": 0.3613663613796234, "learning_rate": 2.3485142212074154e-05, "loss": 0.5092939138412476, "step": 8779 }, { "epoch": 10.773006134969325, "grad_norm": 0.2725246250629425, "learning_rate": 2.3480132305420182e-05, "loss": 0.50029057264328, "step": 8780 }, { "epoch": 10.774233128834355, "grad_norm": 0.23023509979248047, "learning_rate": 2.347512246002774e-05, "loss": 0.7247806787490845, "step": 8781 }, { "epoch": 10.775460122699386, "grad_norm": 0.3187323808670044, "learning_rate": 2.3470112676098755e-05, "loss": 0.5719846487045288, "step": 8782 }, { "epoch": 10.776687116564418, "grad_norm": 0.35083404183387756, "learning_rate": 2.3465102953835175e-05, "loss": 0.607711672782898, "step": 8783 }, { "epoch": 10.777914110429448, "grad_norm": 0.3538508415222168, "learning_rate": 2.3460093293438908e-05, "loss": 0.39287233352661133, "step": 8784 }, { "epoch": 10.779141104294478, "grad_norm": 0.2652246952056885, "learning_rate": 2.3455083695111898e-05, "loss": 0.5466887354850769, "step": 8785 }, { "epoch": 10.78036809815951, "grad_norm": 0.2626037299633026, "learning_rate": 2.345007415905605e-05, "loss": 0.7378718852996826, "step": 8786 }, { "epoch": 10.78159509202454, "grad_norm": 0.28848621249198914, "learning_rate": 2.3445064685473293e-05, "loss": 0.7616268396377563, "step": 8787 }, { "epoch": 10.78282208588957, "grad_norm": 0.2871819734573364, "learning_rate": 2.344005527456555e-05, "loss": 0.7858622074127197, "step": 8788 }, { "epoch": 10.7840490797546, "grad_norm": 0.27282238006591797, "learning_rate": 2.3435045926534723e-05, "loss": 0.5172120332717896, "step": 8789 }, { "epoch": 10.785276073619633, "grad_norm": 0.36020520329475403, "learning_rate": 2.343003664158274e-05, "loss": 0.5145968198776245, "step": 8790 }, { "epoch": 10.786503067484663, "grad_norm": 0.2342919558286667, "learning_rate": 2.3425027419911495e-05, "loss": 0.5887184143066406, "step": 8791 }, { "epoch": 10.787730061349693, "grad_norm": 0.23491622507572174, "learning_rate": 2.34200182617229e-05, "loss": 0.8043111562728882, "step": 8792 }, { "epoch": 10.788957055214723, "grad_norm": 0.28779712319374084, "learning_rate": 2.341500916721887e-05, "loss": 0.7874301671981812, "step": 8793 }, { "epoch": 10.790184049079755, "grad_norm": 0.2984250783920288, "learning_rate": 2.3410000136601293e-05, "loss": 0.6664804220199585, "step": 8794 }, { "epoch": 10.791411042944786, "grad_norm": 0.253057062625885, "learning_rate": 2.3404991170072082e-05, "loss": 0.8108888864517212, "step": 8795 }, { "epoch": 10.792638036809816, "grad_norm": 0.26974934339523315, "learning_rate": 2.3399982267833123e-05, "loss": 0.8214101195335388, "step": 8796 }, { "epoch": 10.793865030674846, "grad_norm": 0.28091132640838623, "learning_rate": 2.3394973430086317e-05, "loss": 0.5864538550376892, "step": 8797 }, { "epoch": 10.795092024539878, "grad_norm": 0.2449904978275299, "learning_rate": 2.338996465703356e-05, "loss": 0.7639191150665283, "step": 8798 }, { "epoch": 10.796319018404908, "grad_norm": 0.2696495056152344, "learning_rate": 2.3384955948876727e-05, "loss": 0.6602015495300293, "step": 8799 }, { "epoch": 10.797546012269938, "grad_norm": 0.2803995907306671, "learning_rate": 2.337994730581771e-05, "loss": 0.5619229674339294, "step": 8800 }, { "epoch": 10.798773006134969, "grad_norm": 0.3473952114582062, "learning_rate": 2.3374938728058397e-05, "loss": 0.6547713279724121, "step": 8801 }, { "epoch": 10.8, "grad_norm": 0.29963523149490356, "learning_rate": 2.336993021580068e-05, "loss": 0.4725923538208008, "step": 8802 }, { "epoch": 10.801226993865031, "grad_norm": 0.2692795693874359, "learning_rate": 2.3364921769246423e-05, "loss": 0.790514349937439, "step": 8803 }, { "epoch": 10.802453987730061, "grad_norm": 0.2675974369049072, "learning_rate": 2.335991338859751e-05, "loss": 0.3835030794143677, "step": 8804 }, { "epoch": 10.803680981595091, "grad_norm": 0.32122254371643066, "learning_rate": 2.3354905074055802e-05, "loss": 0.7467729449272156, "step": 8805 }, { "epoch": 10.804907975460123, "grad_norm": 0.22298076748847961, "learning_rate": 2.3349896825823182e-05, "loss": 0.6456295251846313, "step": 8806 }, { "epoch": 10.806134969325154, "grad_norm": 0.27379900217056274, "learning_rate": 2.3344888644101517e-05, "loss": 0.6554144024848938, "step": 8807 }, { "epoch": 10.807361963190184, "grad_norm": 0.2949743866920471, "learning_rate": 2.3339880529092668e-05, "loss": 0.674950122833252, "step": 8808 }, { "epoch": 10.808588957055214, "grad_norm": 0.3229721486568451, "learning_rate": 2.3334872480998504e-05, "loss": 0.6412492394447327, "step": 8809 }, { "epoch": 10.809815950920246, "grad_norm": 0.31250983476638794, "learning_rate": 2.3329864500020876e-05, "loss": 0.46012255549430847, "step": 8810 }, { "epoch": 10.811042944785276, "grad_norm": 0.24895799160003662, "learning_rate": 2.3324856586361645e-05, "loss": 0.7373510599136353, "step": 8811 }, { "epoch": 10.812269938650306, "grad_norm": 0.2920231521129608, "learning_rate": 2.3319848740222673e-05, "loss": 0.635635256767273, "step": 8812 }, { "epoch": 10.813496932515337, "grad_norm": 0.3042750060558319, "learning_rate": 2.3314840961805802e-05, "loss": 0.4385776221752167, "step": 8813 }, { "epoch": 10.814723926380369, "grad_norm": 0.25562018156051636, "learning_rate": 2.330983325131289e-05, "loss": 0.6676755547523499, "step": 8814 }, { "epoch": 10.815950920245399, "grad_norm": 0.2418944239616394, "learning_rate": 2.3304825608945778e-05, "loss": 0.8023701310157776, "step": 8815 }, { "epoch": 10.81717791411043, "grad_norm": 0.22691361606121063, "learning_rate": 2.3299818034906305e-05, "loss": 0.6833028793334961, "step": 8816 }, { "epoch": 10.81840490797546, "grad_norm": 0.27881523966789246, "learning_rate": 2.329481052939633e-05, "loss": 0.7684999108314514, "step": 8817 }, { "epoch": 10.819631901840491, "grad_norm": 0.3212094306945801, "learning_rate": 2.328980309261767e-05, "loss": 0.6759149432182312, "step": 8818 }, { "epoch": 10.820858895705522, "grad_norm": 0.3419359028339386, "learning_rate": 2.328479572477217e-05, "loss": 0.6686480045318604, "step": 8819 }, { "epoch": 10.822085889570552, "grad_norm": 0.3699989318847656, "learning_rate": 2.3279788426061666e-05, "loss": 0.3457713723182678, "step": 8820 }, { "epoch": 10.823312883435582, "grad_norm": 0.35317835211753845, "learning_rate": 2.327478119668798e-05, "loss": 0.7832891941070557, "step": 8821 }, { "epoch": 10.824539877300614, "grad_norm": 0.3033638000488281, "learning_rate": 2.326977403685294e-05, "loss": 0.43216317892074585, "step": 8822 }, { "epoch": 10.825766871165644, "grad_norm": 0.2797967791557312, "learning_rate": 2.3264766946758376e-05, "loss": 0.6674821972846985, "step": 8823 }, { "epoch": 10.826993865030675, "grad_norm": 0.28268811106681824, "learning_rate": 2.3259759926606114e-05, "loss": 0.7352230548858643, "step": 8824 }, { "epoch": 10.828220858895705, "grad_norm": 0.24283739924430847, "learning_rate": 2.325475297659796e-05, "loss": 0.6694240570068359, "step": 8825 }, { "epoch": 10.829447852760737, "grad_norm": 0.30278706550598145, "learning_rate": 2.3249746096935746e-05, "loss": 0.721108615398407, "step": 8826 }, { "epoch": 10.830674846625767, "grad_norm": 0.2580500841140747, "learning_rate": 2.3244739287821265e-05, "loss": 0.5623528361320496, "step": 8827 }, { "epoch": 10.831901840490797, "grad_norm": 0.2705308198928833, "learning_rate": 2.323973254945634e-05, "loss": 0.5579929351806641, "step": 8828 }, { "epoch": 10.833128834355827, "grad_norm": 0.2743968963623047, "learning_rate": 2.3234725882042775e-05, "loss": 0.5904461145401001, "step": 8829 }, { "epoch": 10.83435582822086, "grad_norm": 0.3257399797439575, "learning_rate": 2.3229719285782375e-05, "loss": 0.6867035627365112, "step": 8830 }, { "epoch": 10.83558282208589, "grad_norm": 0.27699074149131775, "learning_rate": 2.3224712760876945e-05, "loss": 0.41702672839164734, "step": 8831 }, { "epoch": 10.83680981595092, "grad_norm": 0.26859134435653687, "learning_rate": 2.3219706307528278e-05, "loss": 0.6461188197135925, "step": 8832 }, { "epoch": 10.83803680981595, "grad_norm": 0.39260169863700867, "learning_rate": 2.321469992593817e-05, "loss": 0.5955422520637512, "step": 8833 }, { "epoch": 10.839263803680982, "grad_norm": 0.30438777804374695, "learning_rate": 2.320969361630842e-05, "loss": 0.5467377305030823, "step": 8834 }, { "epoch": 10.840490797546012, "grad_norm": 0.3016863763332367, "learning_rate": 2.3204687378840808e-05, "loss": 0.745356023311615, "step": 8835 }, { "epoch": 10.841717791411043, "grad_norm": 0.22765274345874786, "learning_rate": 2.319968121373714e-05, "loss": 0.6500633955001831, "step": 8836 }, { "epoch": 10.842944785276075, "grad_norm": 0.2760096490383148, "learning_rate": 2.3194675121199175e-05, "loss": 0.6445860862731934, "step": 8837 }, { "epoch": 10.844171779141105, "grad_norm": 0.2484193593263626, "learning_rate": 2.3189669101428707e-05, "loss": 0.627593994140625, "step": 8838 }, { "epoch": 10.845398773006135, "grad_norm": 0.2918231189250946, "learning_rate": 2.3184663154627522e-05, "loss": 0.5058544874191284, "step": 8839 }, { "epoch": 10.846625766871165, "grad_norm": 0.2895136773586273, "learning_rate": 2.3179657280997383e-05, "loss": 0.5004497766494751, "step": 8840 }, { "epoch": 10.847852760736195, "grad_norm": 0.2523070275783539, "learning_rate": 2.317465148074007e-05, "loss": 0.7539644241333008, "step": 8841 }, { "epoch": 10.849079754601227, "grad_norm": 0.32406750321388245, "learning_rate": 2.3169645754057345e-05, "loss": 0.6111222505569458, "step": 8842 }, { "epoch": 10.850306748466258, "grad_norm": 0.28257158398628235, "learning_rate": 2.3164640101150982e-05, "loss": 0.5874632596969604, "step": 8843 }, { "epoch": 10.851533742331288, "grad_norm": 0.2612561285495758, "learning_rate": 2.3159634522222735e-05, "loss": 0.7685480713844299, "step": 8844 }, { "epoch": 10.85276073619632, "grad_norm": 0.30479803681373596, "learning_rate": 2.3154629017474384e-05, "loss": 0.5347000956535339, "step": 8845 }, { "epoch": 10.85398773006135, "grad_norm": 0.3307361900806427, "learning_rate": 2.3149623587107666e-05, "loss": 0.7271069884300232, "step": 8846 }, { "epoch": 10.85521472392638, "grad_norm": 0.2792413830757141, "learning_rate": 2.3144618231324343e-05, "loss": 0.5654850006103516, "step": 8847 }, { "epoch": 10.85644171779141, "grad_norm": 0.2614702880382538, "learning_rate": 2.3139612950326176e-05, "loss": 0.607872486114502, "step": 8848 }, { "epoch": 10.857668711656443, "grad_norm": 0.27590465545654297, "learning_rate": 2.3134607744314894e-05, "loss": 0.477689266204834, "step": 8849 }, { "epoch": 10.858895705521473, "grad_norm": 0.2410404086112976, "learning_rate": 2.3129602613492264e-05, "loss": 0.6336785554885864, "step": 8850 }, { "epoch": 10.860122699386503, "grad_norm": 0.2980269491672516, "learning_rate": 2.3124597558060014e-05, "loss": 0.6993310451507568, "step": 8851 }, { "epoch": 10.861349693251533, "grad_norm": 0.318775475025177, "learning_rate": 2.3119592578219885e-05, "loss": 0.5458366870880127, "step": 8852 }, { "epoch": 10.862576687116565, "grad_norm": 0.24922975897789001, "learning_rate": 2.3114587674173623e-05, "loss": 0.7107993960380554, "step": 8853 }, { "epoch": 10.863803680981595, "grad_norm": 0.32734087109565735, "learning_rate": 2.3109582846122946e-05, "loss": 0.620644211769104, "step": 8854 }, { "epoch": 10.865030674846626, "grad_norm": 0.24564391374588013, "learning_rate": 2.31045780942696e-05, "loss": 0.8345627784729004, "step": 8855 }, { "epoch": 10.866257668711656, "grad_norm": 0.331796795129776, "learning_rate": 2.3099573418815303e-05, "loss": 0.6941771507263184, "step": 8856 }, { "epoch": 10.867484662576688, "grad_norm": 0.26235294342041016, "learning_rate": 2.3094568819961778e-05, "loss": 0.5984723567962646, "step": 8857 }, { "epoch": 10.868711656441718, "grad_norm": 0.26161107420921326, "learning_rate": 2.3089564297910752e-05, "loss": 0.7105071544647217, "step": 8858 }, { "epoch": 10.869938650306748, "grad_norm": 0.3107905685901642, "learning_rate": 2.308455985286394e-05, "loss": 0.6802712678909302, "step": 8859 }, { "epoch": 10.871165644171779, "grad_norm": 0.3013254404067993, "learning_rate": 2.3079555485023058e-05, "loss": 0.6330121755599976, "step": 8860 }, { "epoch": 10.87239263803681, "grad_norm": 0.2225983589887619, "learning_rate": 2.3074551194589818e-05, "loss": 0.8697654008865356, "step": 8861 }, { "epoch": 10.87361963190184, "grad_norm": 0.30100223422050476, "learning_rate": 2.306954698176592e-05, "loss": 0.4108645021915436, "step": 8862 }, { "epoch": 10.874846625766871, "grad_norm": 0.2402409017086029, "learning_rate": 2.3064542846753088e-05, "loss": 0.4321938455104828, "step": 8863 }, { "epoch": 10.876073619631901, "grad_norm": 0.35941219329833984, "learning_rate": 2.3059538789753007e-05, "loss": 0.4569013714790344, "step": 8864 }, { "epoch": 10.877300613496933, "grad_norm": 0.22367404401302338, "learning_rate": 2.3054534810967383e-05, "loss": 0.7032262086868286, "step": 8865 }, { "epoch": 10.878527607361963, "grad_norm": 0.2623329162597656, "learning_rate": 2.3049530910597907e-05, "loss": 0.6914904117584229, "step": 8866 }, { "epoch": 10.879754601226994, "grad_norm": 0.3289097845554352, "learning_rate": 2.304452708884629e-05, "loss": 0.7924574613571167, "step": 8867 }, { "epoch": 10.880981595092024, "grad_norm": 0.2729896306991577, "learning_rate": 2.30395233459142e-05, "loss": 0.3662257790565491, "step": 8868 }, { "epoch": 10.882208588957056, "grad_norm": 0.2633034586906433, "learning_rate": 2.3034519682003333e-05, "loss": 0.5259355306625366, "step": 8869 }, { "epoch": 10.883435582822086, "grad_norm": 0.27701935172080994, "learning_rate": 2.302951609731538e-05, "loss": 0.707506537437439, "step": 8870 }, { "epoch": 10.884662576687116, "grad_norm": 0.2647763192653656, "learning_rate": 2.3024512592052004e-05, "loss": 0.6944465637207031, "step": 8871 }, { "epoch": 10.885889570552147, "grad_norm": 0.3063681125640869, "learning_rate": 2.30195091664149e-05, "loss": 0.6559165716171265, "step": 8872 }, { "epoch": 10.887116564417179, "grad_norm": 0.3426179587841034, "learning_rate": 2.3014505820605724e-05, "loss": 0.6437474489212036, "step": 8873 }, { "epoch": 10.888343558282209, "grad_norm": 0.24639539420604706, "learning_rate": 2.3009502554826156e-05, "loss": 0.7980961799621582, "step": 8874 }, { "epoch": 10.889570552147239, "grad_norm": 0.26364660263061523, "learning_rate": 2.300449936927787e-05, "loss": 0.6963456869125366, "step": 8875 }, { "epoch": 10.89079754601227, "grad_norm": 0.2833741009235382, "learning_rate": 2.299949626416252e-05, "loss": 0.6395502090454102, "step": 8876 }, { "epoch": 10.892024539877301, "grad_norm": 0.31284335255622864, "learning_rate": 2.299449323968177e-05, "loss": 0.5730584859848022, "step": 8877 }, { "epoch": 10.893251533742331, "grad_norm": 0.2939932644367218, "learning_rate": 2.2989490296037276e-05, "loss": 0.6649860143661499, "step": 8878 }, { "epoch": 10.894478527607362, "grad_norm": 0.33483707904815674, "learning_rate": 2.2984487433430697e-05, "loss": 0.5212449431419373, "step": 8879 }, { "epoch": 10.895705521472392, "grad_norm": 0.23979386687278748, "learning_rate": 2.297948465206368e-05, "loss": 0.7129271030426025, "step": 8880 }, { "epoch": 10.896932515337424, "grad_norm": 0.24249041080474854, "learning_rate": 2.297448195213787e-05, "loss": 0.5196079611778259, "step": 8881 }, { "epoch": 10.898159509202454, "grad_norm": 0.3008674383163452, "learning_rate": 2.2969479333854923e-05, "loss": 0.7366626858711243, "step": 8882 }, { "epoch": 10.899386503067484, "grad_norm": 0.23336724936962128, "learning_rate": 2.296447679741647e-05, "loss": 0.7731987237930298, "step": 8883 }, { "epoch": 10.900613496932515, "grad_norm": 0.2862638235092163, "learning_rate": 2.2959474343024147e-05, "loss": 0.5218504667282104, "step": 8884 }, { "epoch": 10.901840490797547, "grad_norm": 0.29585516452789307, "learning_rate": 2.29544719708796e-05, "loss": 0.695121169090271, "step": 8885 }, { "epoch": 10.903067484662577, "grad_norm": 0.2373393028974533, "learning_rate": 2.2949469681184448e-05, "loss": 0.6666534543037415, "step": 8886 }, { "epoch": 10.904294478527607, "grad_norm": 0.29997846484184265, "learning_rate": 2.294446747414033e-05, "loss": 0.39821410179138184, "step": 8887 }, { "epoch": 10.905521472392637, "grad_norm": 0.292394757270813, "learning_rate": 2.2939465349948864e-05, "loss": 0.48153364658355713, "step": 8888 }, { "epoch": 10.90674846625767, "grad_norm": 0.31716135144233704, "learning_rate": 2.2934463308811662e-05, "loss": 0.6066288352012634, "step": 8889 }, { "epoch": 10.9079754601227, "grad_norm": 0.29162532091140747, "learning_rate": 2.2929461350930363e-05, "loss": 0.6055065393447876, "step": 8890 }, { "epoch": 10.90920245398773, "grad_norm": 0.2817787230014801, "learning_rate": 2.2924459476506575e-05, "loss": 0.4788804054260254, "step": 8891 }, { "epoch": 10.91042944785276, "grad_norm": 0.29544079303741455, "learning_rate": 2.2919457685741898e-05, "loss": 0.6647294759750366, "step": 8892 }, { "epoch": 10.911656441717792, "grad_norm": 0.2722644507884979, "learning_rate": 2.291445597883795e-05, "loss": 0.5138098001480103, "step": 8893 }, { "epoch": 10.912883435582822, "grad_norm": 0.2976483702659607, "learning_rate": 2.290945435599634e-05, "loss": 0.3858894109725952, "step": 8894 }, { "epoch": 10.914110429447852, "grad_norm": 0.21613486111164093, "learning_rate": 2.2904452817418655e-05, "loss": 0.7305463552474976, "step": 8895 }, { "epoch": 10.915337423312884, "grad_norm": 0.3030681312084198, "learning_rate": 2.2899451363306508e-05, "loss": 0.6230363249778748, "step": 8896 }, { "epoch": 10.916564417177915, "grad_norm": 0.22965556383132935, "learning_rate": 2.2894449993861477e-05, "loss": 0.5784994959831238, "step": 8897 }, { "epoch": 10.917791411042945, "grad_norm": 0.2979162931442261, "learning_rate": 2.2889448709285166e-05, "loss": 0.9253453016281128, "step": 8898 }, { "epoch": 10.919018404907975, "grad_norm": 0.3341808319091797, "learning_rate": 2.2884447509779162e-05, "loss": 0.8945635557174683, "step": 8899 }, { "epoch": 10.920245398773005, "grad_norm": 0.2444009929895401, "learning_rate": 2.2879446395545036e-05, "loss": 0.8143002986907959, "step": 8900 }, { "epoch": 10.921472392638037, "grad_norm": 0.32878273725509644, "learning_rate": 2.2874445366784388e-05, "loss": 0.7113072872161865, "step": 8901 }, { "epoch": 10.922699386503067, "grad_norm": 0.2792627215385437, "learning_rate": 2.2869444423698777e-05, "loss": 0.7448203563690186, "step": 8902 }, { "epoch": 10.923926380368098, "grad_norm": 0.3376804292201996, "learning_rate": 2.2864443566489786e-05, "loss": 0.42683497071266174, "step": 8903 }, { "epoch": 10.92515337423313, "grad_norm": 0.23239381611347198, "learning_rate": 2.285944279535899e-05, "loss": 0.7813015580177307, "step": 8904 }, { "epoch": 10.92638036809816, "grad_norm": 0.22227807343006134, "learning_rate": 2.285444211050794e-05, "loss": 0.7841237783432007, "step": 8905 }, { "epoch": 10.92760736196319, "grad_norm": 0.21935424208641052, "learning_rate": 2.2849441512138218e-05, "loss": 0.5673547983169556, "step": 8906 }, { "epoch": 10.92883435582822, "grad_norm": 0.2806585729122162, "learning_rate": 2.284444100045137e-05, "loss": 0.6014366745948792, "step": 8907 }, { "epoch": 10.93006134969325, "grad_norm": 0.23657919466495514, "learning_rate": 2.2839440575648954e-05, "loss": 0.7638148069381714, "step": 8908 }, { "epoch": 10.931288343558283, "grad_norm": 0.3552476465702057, "learning_rate": 2.2834440237932536e-05, "loss": 0.5562011003494263, "step": 8909 }, { "epoch": 10.932515337423313, "grad_norm": 0.29886990785598755, "learning_rate": 2.2829439987503646e-05, "loss": 0.5794436931610107, "step": 8910 }, { "epoch": 10.933742331288343, "grad_norm": 0.22622963786125183, "learning_rate": 2.282443982456384e-05, "loss": 0.6792076826095581, "step": 8911 }, { "epoch": 10.934969325153375, "grad_norm": 0.24684841930866241, "learning_rate": 2.281943974931466e-05, "loss": 0.753345251083374, "step": 8912 }, { "epoch": 10.936196319018405, "grad_norm": 0.23308825492858887, "learning_rate": 2.2814439761957652e-05, "loss": 0.6266224980354309, "step": 8913 }, { "epoch": 10.937423312883435, "grad_norm": 0.21080678701400757, "learning_rate": 2.280943986269434e-05, "loss": 0.6499351859092712, "step": 8914 }, { "epoch": 10.938650306748466, "grad_norm": 0.25488439202308655, "learning_rate": 2.280444005172626e-05, "loss": 0.699218213558197, "step": 8915 }, { "epoch": 10.939877300613498, "grad_norm": 0.24557209014892578, "learning_rate": 2.279944032925494e-05, "loss": 0.6775669455528259, "step": 8916 }, { "epoch": 10.941104294478528, "grad_norm": 0.5311304330825806, "learning_rate": 2.27944406954819e-05, "loss": 0.5138053894042969, "step": 8917 }, { "epoch": 10.942331288343558, "grad_norm": 0.21801212430000305, "learning_rate": 2.2789441150608673e-05, "loss": 0.7168072462081909, "step": 8918 }, { "epoch": 10.943558282208588, "grad_norm": 0.27474528551101685, "learning_rate": 2.278444169483676e-05, "loss": 0.7632660865783691, "step": 8919 }, { "epoch": 10.94478527607362, "grad_norm": 0.29167070984840393, "learning_rate": 2.2779442328367686e-05, "loss": 0.5659205913543701, "step": 8920 }, { "epoch": 10.94601226993865, "grad_norm": 0.3572607934474945, "learning_rate": 2.2774443051402964e-05, "loss": 0.5464391708374023, "step": 8921 }, { "epoch": 10.94723926380368, "grad_norm": 0.26407375931739807, "learning_rate": 2.276944386414409e-05, "loss": 0.6308246850967407, "step": 8922 }, { "epoch": 10.948466257668711, "grad_norm": 0.2245800495147705, "learning_rate": 2.2764444766792576e-05, "loss": 0.8830885887145996, "step": 8923 }, { "epoch": 10.949693251533743, "grad_norm": 0.2859495282173157, "learning_rate": 2.2759445759549915e-05, "loss": 0.7192715406417847, "step": 8924 }, { "epoch": 10.950920245398773, "grad_norm": 0.3937923312187195, "learning_rate": 2.27544468426176e-05, "loss": 0.5520912408828735, "step": 8925 }, { "epoch": 10.952147239263804, "grad_norm": 0.2508174180984497, "learning_rate": 2.2749448016197138e-05, "loss": 0.7735974192619324, "step": 8926 }, { "epoch": 10.953374233128834, "grad_norm": 0.27593350410461426, "learning_rate": 2.274444928049e-05, "loss": 0.6488360166549683, "step": 8927 }, { "epoch": 10.954601226993866, "grad_norm": 0.27487704157829285, "learning_rate": 2.2739450635697683e-05, "loss": 0.7813965082168579, "step": 8928 }, { "epoch": 10.955828220858896, "grad_norm": 0.23574867844581604, "learning_rate": 2.273445208202166e-05, "loss": 0.6058399081230164, "step": 8929 }, { "epoch": 10.957055214723926, "grad_norm": 0.22672800719738007, "learning_rate": 2.2729453619663413e-05, "loss": 0.5908094644546509, "step": 8930 }, { "epoch": 10.958282208588956, "grad_norm": 0.3359568417072296, "learning_rate": 2.2724455248824418e-05, "loss": 0.49689650535583496, "step": 8931 }, { "epoch": 10.959509202453988, "grad_norm": 0.2501412034034729, "learning_rate": 2.271945696970614e-05, "loss": 0.7057815790176392, "step": 8932 }, { "epoch": 10.960736196319019, "grad_norm": 0.26839470863342285, "learning_rate": 2.2714458782510036e-05, "loss": 0.700633704662323, "step": 8933 }, { "epoch": 10.961963190184049, "grad_norm": 0.39133286476135254, "learning_rate": 2.270946068743759e-05, "loss": 0.31997376680374146, "step": 8934 }, { "epoch": 10.963190184049079, "grad_norm": 0.32075658440589905, "learning_rate": 2.2704462684690252e-05, "loss": 0.5960777997970581, "step": 8935 }, { "epoch": 10.964417177914111, "grad_norm": 0.2501123547554016, "learning_rate": 2.2699464774469474e-05, "loss": 0.6425477266311646, "step": 8936 }, { "epoch": 10.965644171779141, "grad_norm": 0.3178403675556183, "learning_rate": 2.2694466956976714e-05, "loss": 0.6139909625053406, "step": 8937 }, { "epoch": 10.966871165644172, "grad_norm": 0.22518709301948547, "learning_rate": 2.2689469232413412e-05, "loss": 0.5925325155258179, "step": 8938 }, { "epoch": 10.968098159509202, "grad_norm": 0.30103880167007446, "learning_rate": 2.2684471600981013e-05, "loss": 0.7277971506118774, "step": 8939 }, { "epoch": 10.969325153374234, "grad_norm": 0.2561323940753937, "learning_rate": 2.267947406288097e-05, "loss": 0.6680004000663757, "step": 8940 }, { "epoch": 10.970552147239264, "grad_norm": 0.2712838053703308, "learning_rate": 2.2674476618314697e-05, "loss": 0.7970792055130005, "step": 8941 }, { "epoch": 10.971779141104294, "grad_norm": 0.2438758909702301, "learning_rate": 2.266947926748365e-05, "loss": 0.8270905017852783, "step": 8942 }, { "epoch": 10.973006134969324, "grad_norm": 0.2884966731071472, "learning_rate": 2.2664482010589237e-05, "loss": 0.6936507225036621, "step": 8943 }, { "epoch": 10.974233128834356, "grad_norm": 0.3347470462322235, "learning_rate": 2.2659484847832894e-05, "loss": 0.6079571843147278, "step": 8944 }, { "epoch": 10.975460122699387, "grad_norm": 0.3787249028682709, "learning_rate": 2.2654487779416048e-05, "loss": 0.40004420280456543, "step": 8945 }, { "epoch": 10.976687116564417, "grad_norm": 0.2697608768939972, "learning_rate": 2.2649490805540103e-05, "loss": 0.7176291942596436, "step": 8946 }, { "epoch": 10.977914110429447, "grad_norm": 0.27842506766319275, "learning_rate": 2.2644493926406484e-05, "loss": 0.6400555372238159, "step": 8947 }, { "epoch": 10.979141104294479, "grad_norm": 0.2957735061645508, "learning_rate": 2.2639497142216595e-05, "loss": 0.6541576385498047, "step": 8948 }, { "epoch": 10.98036809815951, "grad_norm": 0.26251184940338135, "learning_rate": 2.263450045317184e-05, "loss": 0.6328268051147461, "step": 8949 }, { "epoch": 10.98159509202454, "grad_norm": 0.2336115688085556, "learning_rate": 2.2629503859473634e-05, "loss": 0.6228305101394653, "step": 8950 }, { "epoch": 10.98282208588957, "grad_norm": 0.3174164593219757, "learning_rate": 2.262450736132336e-05, "loss": 0.688563883304596, "step": 8951 }, { "epoch": 10.984049079754602, "grad_norm": 0.27646490931510925, "learning_rate": 2.261951095892243e-05, "loss": 0.7476619482040405, "step": 8952 }, { "epoch": 10.985276073619632, "grad_norm": 0.2916063666343689, "learning_rate": 2.2614514652472207e-05, "loss": 0.7692729234695435, "step": 8953 }, { "epoch": 10.986503067484662, "grad_norm": 0.22469112277030945, "learning_rate": 2.2609518442174102e-05, "loss": 0.7066100835800171, "step": 8954 }, { "epoch": 10.987730061349692, "grad_norm": 0.3403623700141907, "learning_rate": 2.2604522328229486e-05, "loss": 0.5756995677947998, "step": 8955 }, { "epoch": 10.988957055214724, "grad_norm": 0.3829893171787262, "learning_rate": 2.259952631083975e-05, "loss": 0.3877810835838318, "step": 8956 }, { "epoch": 10.990184049079755, "grad_norm": 0.29904094338417053, "learning_rate": 2.259453039020626e-05, "loss": 0.6853751540184021, "step": 8957 }, { "epoch": 10.991411042944785, "grad_norm": 0.2612892687320709, "learning_rate": 2.2589534566530385e-05, "loss": 0.6687577962875366, "step": 8958 }, { "epoch": 10.992638036809815, "grad_norm": 0.32674646377563477, "learning_rate": 2.2584538840013505e-05, "loss": 0.6674345135688782, "step": 8959 }, { "epoch": 10.993865030674847, "grad_norm": 0.24751299619674683, "learning_rate": 2.257954321085697e-05, "loss": 0.7305450439453125, "step": 8960 }, { "epoch": 10.995092024539877, "grad_norm": 0.28948092460632324, "learning_rate": 2.2574547679262152e-05, "loss": 0.8025891780853271, "step": 8961 }, { "epoch": 10.996319018404908, "grad_norm": 0.287122517824173, "learning_rate": 2.2569552245430392e-05, "loss": 0.6145433783531189, "step": 8962 }, { "epoch": 10.99754601226994, "grad_norm": 0.28907710313796997, "learning_rate": 2.2564556909563048e-05, "loss": 0.6465986967086792, "step": 8963 }, { "epoch": 10.99877300613497, "grad_norm": 0.2373785376548767, "learning_rate": 2.2559561671861475e-05, "loss": 0.6321878433227539, "step": 8964 }, { "epoch": 11.0, "grad_norm": 0.4077962040901184, "learning_rate": 2.2554566532527005e-05, "loss": 0.4786819815635681, "step": 8965 }, { "epoch": 11.00122699386503, "grad_norm": 0.24739064276218414, "learning_rate": 2.2549571491760986e-05, "loss": 0.6786208748817444, "step": 8966 }, { "epoch": 11.002453987730062, "grad_norm": 0.2697179913520813, "learning_rate": 2.2544576549764754e-05, "loss": 0.49530652165412903, "step": 8967 }, { "epoch": 11.003680981595092, "grad_norm": 0.25159814953804016, "learning_rate": 2.253958170673963e-05, "loss": 0.6685546636581421, "step": 8968 }, { "epoch": 11.004907975460123, "grad_norm": 0.21253052353858948, "learning_rate": 2.2534586962886964e-05, "loss": 0.8939394950866699, "step": 8969 }, { "epoch": 11.006134969325153, "grad_norm": 0.23666970431804657, "learning_rate": 2.2529592318408054e-05, "loss": 0.8133875131607056, "step": 8970 }, { "epoch": 11.007361963190185, "grad_norm": 0.24050858616828918, "learning_rate": 2.2524597773504233e-05, "loss": 0.5689035654067993, "step": 8971 }, { "epoch": 11.008588957055215, "grad_norm": 0.34024593234062195, "learning_rate": 2.2519603328376823e-05, "loss": 0.7106407880783081, "step": 8972 }, { "epoch": 11.009815950920245, "grad_norm": 0.3172786831855774, "learning_rate": 2.251460898322712e-05, "loss": 0.5303924083709717, "step": 8973 }, { "epoch": 11.011042944785276, "grad_norm": 0.38917404413223267, "learning_rate": 2.2509614738256448e-05, "loss": 0.6567904949188232, "step": 8974 }, { "epoch": 11.012269938650308, "grad_norm": 0.2596832811832428, "learning_rate": 2.2504620593666097e-05, "loss": 0.616798996925354, "step": 8975 }, { "epoch": 11.013496932515338, "grad_norm": 0.29007866978645325, "learning_rate": 2.2499626549657374e-05, "loss": 0.7219976186752319, "step": 8976 }, { "epoch": 11.014723926380368, "grad_norm": 0.2741481363773346, "learning_rate": 2.2494632606431576e-05, "loss": 0.7066389918327332, "step": 8977 }, { "epoch": 11.015950920245398, "grad_norm": 0.282890647649765, "learning_rate": 2.2489638764189984e-05, "loss": 0.6011643409729004, "step": 8978 }, { "epoch": 11.01717791411043, "grad_norm": 0.28313449025154114, "learning_rate": 2.2484645023133897e-05, "loss": 0.5473277568817139, "step": 8979 }, { "epoch": 11.01840490797546, "grad_norm": 0.208548903465271, "learning_rate": 2.24796513834646e-05, "loss": 0.5962758660316467, "step": 8980 }, { "epoch": 11.01963190184049, "grad_norm": 0.2786494791507721, "learning_rate": 2.2474657845383367e-05, "loss": 0.7434950470924377, "step": 8981 }, { "epoch": 11.020858895705521, "grad_norm": 0.27553048729896545, "learning_rate": 2.246966440909147e-05, "loss": 0.509188175201416, "step": 8982 }, { "epoch": 11.022085889570553, "grad_norm": 0.2549065351486206, "learning_rate": 2.246467107479019e-05, "loss": 0.6896716356277466, "step": 8983 }, { "epoch": 11.023312883435583, "grad_norm": 0.3160596787929535, "learning_rate": 2.2459677842680782e-05, "loss": 0.6175902485847473, "step": 8984 }, { "epoch": 11.024539877300613, "grad_norm": 0.363022118806839, "learning_rate": 2.2454684712964515e-05, "loss": 0.40946096181869507, "step": 8985 }, { "epoch": 11.025766871165644, "grad_norm": 0.2881472408771515, "learning_rate": 2.2449691685842657e-05, "loss": 0.5265710353851318, "step": 8986 }, { "epoch": 11.026993865030676, "grad_norm": 0.24598069489002228, "learning_rate": 2.2444698761516443e-05, "loss": 0.651706874370575, "step": 8987 }, { "epoch": 11.028220858895706, "grad_norm": 0.26316580176353455, "learning_rate": 2.243970594018714e-05, "loss": 0.7247450351715088, "step": 8988 }, { "epoch": 11.029447852760736, "grad_norm": 0.2633914351463318, "learning_rate": 2.2434713222055985e-05, "loss": 0.7014328241348267, "step": 8989 }, { "epoch": 11.030674846625766, "grad_norm": 0.2353706657886505, "learning_rate": 2.2429720607324224e-05, "loss": 0.731281042098999, "step": 8990 }, { "epoch": 11.031901840490798, "grad_norm": 0.25838813185691833, "learning_rate": 2.2424728096193096e-05, "loss": 0.5541221499443054, "step": 8991 }, { "epoch": 11.033128834355828, "grad_norm": 0.2916668653488159, "learning_rate": 2.241973568886383e-05, "loss": 0.591254711151123, "step": 8992 }, { "epoch": 11.034355828220859, "grad_norm": 0.2144465297460556, "learning_rate": 2.2414743385537666e-05, "loss": 0.7602359056472778, "step": 8993 }, { "epoch": 11.035582822085889, "grad_norm": 0.23952727019786835, "learning_rate": 2.2409751186415817e-05, "loss": 0.5082275867462158, "step": 8994 }, { "epoch": 11.036809815950921, "grad_norm": 0.2559584975242615, "learning_rate": 2.240475909169951e-05, "loss": 0.7603647708892822, "step": 8995 }, { "epoch": 11.038036809815951, "grad_norm": 0.26047688722610474, "learning_rate": 2.2399767101589965e-05, "loss": 0.6335472464561462, "step": 8996 }, { "epoch": 11.039263803680981, "grad_norm": 0.28516584634780884, "learning_rate": 2.2394775216288392e-05, "loss": 0.5169500112533569, "step": 8997 }, { "epoch": 11.040490797546012, "grad_norm": 0.24596667289733887, "learning_rate": 2.2389783435996e-05, "loss": 0.715294361114502, "step": 8998 }, { "epoch": 11.041717791411044, "grad_norm": 0.2639316916465759, "learning_rate": 2.238479176091399e-05, "loss": 0.8461528420448303, "step": 8999 }, { "epoch": 11.042944785276074, "grad_norm": 0.24096326529979706, "learning_rate": 2.2379800191243562e-05, "loss": 0.5278549790382385, "step": 9000 }, { "epoch": 11.044171779141104, "grad_norm": 0.22687587141990662, "learning_rate": 2.2374808727185917e-05, "loss": 0.9096822738647461, "step": 9001 }, { "epoch": 11.045398773006134, "grad_norm": 0.26598405838012695, "learning_rate": 2.236981736894225e-05, "loss": 0.6335574388504028, "step": 9002 }, { "epoch": 11.046625766871166, "grad_norm": 0.26535308361053467, "learning_rate": 2.2364826116713738e-05, "loss": 0.6081584692001343, "step": 9003 }, { "epoch": 11.047852760736196, "grad_norm": 0.26388394832611084, "learning_rate": 2.2359834970701572e-05, "loss": 0.5950449109077454, "step": 9004 }, { "epoch": 11.049079754601227, "grad_norm": 0.23620183765888214, "learning_rate": 2.2354843931106933e-05, "loss": 0.5334432125091553, "step": 9005 }, { "epoch": 11.050306748466257, "grad_norm": 0.22572676837444305, "learning_rate": 2.2349852998130984e-05, "loss": 0.526008665561676, "step": 9006 }, { "epoch": 11.051533742331289, "grad_norm": 0.3174230456352234, "learning_rate": 2.234486217197491e-05, "loss": 0.5703580379486084, "step": 9007 }, { "epoch": 11.05276073619632, "grad_norm": 0.32985106110572815, "learning_rate": 2.2339871452839863e-05, "loss": 0.4889127016067505, "step": 9008 }, { "epoch": 11.05398773006135, "grad_norm": 0.23866823315620422, "learning_rate": 2.2334880840927013e-05, "loss": 0.6286141872406006, "step": 9009 }, { "epoch": 11.05521472392638, "grad_norm": 0.3079235553741455, "learning_rate": 2.2329890336437516e-05, "loss": 0.5681132674217224, "step": 9010 }, { "epoch": 11.056441717791412, "grad_norm": 0.2781181335449219, "learning_rate": 2.232489993957252e-05, "loss": 0.6063030958175659, "step": 9011 }, { "epoch": 11.057668711656442, "grad_norm": 0.2903244197368622, "learning_rate": 2.2319909650533187e-05, "loss": 0.6694138646125793, "step": 9012 }, { "epoch": 11.058895705521472, "grad_norm": 0.2644781470298767, "learning_rate": 2.2314919469520643e-05, "loss": 0.5192304849624634, "step": 9013 }, { "epoch": 11.060122699386502, "grad_norm": 0.24911680817604065, "learning_rate": 2.2309929396736037e-05, "loss": 0.665267825126648, "step": 9014 }, { "epoch": 11.061349693251534, "grad_norm": 0.2623513340950012, "learning_rate": 2.230493943238051e-05, "loss": 0.730292022228241, "step": 9015 }, { "epoch": 11.062576687116565, "grad_norm": 0.28849276900291443, "learning_rate": 2.2299949576655183e-05, "loss": 0.5761388540267944, "step": 9016 }, { "epoch": 11.063803680981595, "grad_norm": 0.28321948647499084, "learning_rate": 2.229495982976119e-05, "loss": 0.6564351916313171, "step": 9017 }, { "epoch": 11.065030674846625, "grad_norm": 0.2339242547750473, "learning_rate": 2.2289970191899654e-05, "loss": 0.8024856448173523, "step": 9018 }, { "epoch": 11.066257668711657, "grad_norm": 0.2486351579427719, "learning_rate": 2.2284980663271683e-05, "loss": 0.6701191067695618, "step": 9019 }, { "epoch": 11.067484662576687, "grad_norm": 0.20296958088874817, "learning_rate": 2.2279991244078407e-05, "loss": 0.739526093006134, "step": 9020 }, { "epoch": 11.068711656441717, "grad_norm": 0.2994755506515503, "learning_rate": 2.2275001934520923e-05, "loss": 0.5413514375686646, "step": 9021 }, { "epoch": 11.069938650306748, "grad_norm": 0.2585681080818176, "learning_rate": 2.2270012734800328e-05, "loss": 0.471454381942749, "step": 9022 }, { "epoch": 11.07116564417178, "grad_norm": 0.21934691071510315, "learning_rate": 2.2265023645117742e-05, "loss": 0.7738031148910522, "step": 9023 }, { "epoch": 11.07239263803681, "grad_norm": 0.2686108648777008, "learning_rate": 2.2260034665674255e-05, "loss": 0.7628161907196045, "step": 9024 }, { "epoch": 11.07361963190184, "grad_norm": 0.2284572422504425, "learning_rate": 2.2255045796670956e-05, "loss": 0.5559903383255005, "step": 9025 }, { "epoch": 11.07484662576687, "grad_norm": 0.2671736478805542, "learning_rate": 2.225005703830893e-05, "loss": 0.5248673558235168, "step": 9026 }, { "epoch": 11.076073619631902, "grad_norm": 0.28762492537498474, "learning_rate": 2.2245068390789266e-05, "loss": 0.6998753547668457, "step": 9027 }, { "epoch": 11.077300613496933, "grad_norm": 0.2962742745876312, "learning_rate": 2.2240079854313033e-05, "loss": 0.5369511842727661, "step": 9028 }, { "epoch": 11.078527607361963, "grad_norm": 0.23928461968898773, "learning_rate": 2.2235091429081316e-05, "loss": 0.6898243427276611, "step": 9029 }, { "epoch": 11.079754601226995, "grad_norm": 0.29541516304016113, "learning_rate": 2.2230103115295168e-05, "loss": 0.6693230271339417, "step": 9030 }, { "epoch": 11.080981595092025, "grad_norm": 0.2541494071483612, "learning_rate": 2.2225114913155666e-05, "loss": 0.7899926900863647, "step": 9031 }, { "epoch": 11.082208588957055, "grad_norm": 0.279952734708786, "learning_rate": 2.2220126822863874e-05, "loss": 0.5759648084640503, "step": 9032 }, { "epoch": 11.083435582822085, "grad_norm": 0.24235257506370544, "learning_rate": 2.2215138844620835e-05, "loss": 0.7156075239181519, "step": 9033 }, { "epoch": 11.084662576687117, "grad_norm": 0.27173691987991333, "learning_rate": 2.2210150978627613e-05, "loss": 0.5904428958892822, "step": 9034 }, { "epoch": 11.085889570552148, "grad_norm": 0.22780419886112213, "learning_rate": 2.220516322508524e-05, "loss": 0.7288241386413574, "step": 9035 }, { "epoch": 11.087116564417178, "grad_norm": 0.3075476288795471, "learning_rate": 2.220017558419477e-05, "loss": 0.4186530113220215, "step": 9036 }, { "epoch": 11.088343558282208, "grad_norm": 0.2737850248813629, "learning_rate": 2.219518805615724e-05, "loss": 0.5116289854049683, "step": 9037 }, { "epoch": 11.08957055214724, "grad_norm": 0.2779368460178375, "learning_rate": 2.2190200641173675e-05, "loss": 0.5177784562110901, "step": 9038 }, { "epoch": 11.09079754601227, "grad_norm": 0.26142704486846924, "learning_rate": 2.2185213339445114e-05, "loss": 0.7388403415679932, "step": 9039 }, { "epoch": 11.0920245398773, "grad_norm": 0.23522546887397766, "learning_rate": 2.218022615117257e-05, "loss": 0.6149908304214478, "step": 9040 }, { "epoch": 11.09325153374233, "grad_norm": 0.268452912569046, "learning_rate": 2.2175239076557068e-05, "loss": 0.7351124882698059, "step": 9041 }, { "epoch": 11.094478527607363, "grad_norm": 0.2706090807914734, "learning_rate": 2.2170252115799625e-05, "loss": 0.7539585828781128, "step": 9042 }, { "epoch": 11.095705521472393, "grad_norm": 0.26611676812171936, "learning_rate": 2.2165265269101246e-05, "loss": 0.7532260417938232, "step": 9043 }, { "epoch": 11.096932515337423, "grad_norm": 0.27261924743652344, "learning_rate": 2.2160278536662933e-05, "loss": 0.751930296421051, "step": 9044 }, { "epoch": 11.098159509202453, "grad_norm": 0.24125255644321442, "learning_rate": 2.2155291918685697e-05, "loss": 0.5462782382965088, "step": 9045 }, { "epoch": 11.099386503067485, "grad_norm": 0.23418042063713074, "learning_rate": 2.2150305415370538e-05, "loss": 0.7557603716850281, "step": 9046 }, { "epoch": 11.100613496932516, "grad_norm": 0.270143985748291, "learning_rate": 2.214531902691843e-05, "loss": 0.5354474782943726, "step": 9047 }, { "epoch": 11.101840490797546, "grad_norm": 0.2531948387622833, "learning_rate": 2.2140332753530383e-05, "loss": 0.5726578831672668, "step": 9048 }, { "epoch": 11.103067484662576, "grad_norm": 0.24942079186439514, "learning_rate": 2.2135346595407357e-05, "loss": 0.7891460657119751, "step": 9049 }, { "epoch": 11.104294478527608, "grad_norm": 0.2303026020526886, "learning_rate": 2.213036055275034e-05, "loss": 0.7603711485862732, "step": 9050 }, { "epoch": 11.105521472392638, "grad_norm": 0.25019821524620056, "learning_rate": 2.2125374625760313e-05, "loss": 0.6276224255561829, "step": 9051 }, { "epoch": 11.106748466257669, "grad_norm": 0.22902213037014008, "learning_rate": 2.2120388814638226e-05, "loss": 0.6612306833267212, "step": 9052 }, { "epoch": 11.107975460122699, "grad_norm": 0.2428750842809677, "learning_rate": 2.211540311958506e-05, "loss": 0.657555878162384, "step": 9053 }, { "epoch": 11.10920245398773, "grad_norm": 0.22683949768543243, "learning_rate": 2.2110417540801762e-05, "loss": 0.6985154151916504, "step": 9054 }, { "epoch": 11.110429447852761, "grad_norm": 0.2743312418460846, "learning_rate": 2.2105432078489293e-05, "loss": 0.5016511678695679, "step": 9055 }, { "epoch": 11.111656441717791, "grad_norm": 0.27539974451065063, "learning_rate": 2.2100446732848605e-05, "loss": 0.6855504512786865, "step": 9056 }, { "epoch": 11.112883435582821, "grad_norm": 0.2523886561393738, "learning_rate": 2.2095461504080632e-05, "loss": 0.7388455867767334, "step": 9057 }, { "epoch": 11.114110429447853, "grad_norm": 0.2807312607765198, "learning_rate": 2.209047639238633e-05, "loss": 0.6985169649124146, "step": 9058 }, { "epoch": 11.115337423312884, "grad_norm": 0.23134745657444, "learning_rate": 2.2085491397966622e-05, "loss": 0.769493818283081, "step": 9059 }, { "epoch": 11.116564417177914, "grad_norm": 0.23091621696949005, "learning_rate": 2.208050652102244e-05, "loss": 0.7548106908798218, "step": 9060 }, { "epoch": 11.117791411042944, "grad_norm": 0.26141035556793213, "learning_rate": 2.2075521761754724e-05, "loss": 0.5285958051681519, "step": 9061 }, { "epoch": 11.119018404907976, "grad_norm": 0.3127463161945343, "learning_rate": 2.2070537120364374e-05, "loss": 0.6605629920959473, "step": 9062 }, { "epoch": 11.120245398773006, "grad_norm": 0.25477492809295654, "learning_rate": 2.2065552597052325e-05, "loss": 0.5680862665176392, "step": 9063 }, { "epoch": 11.121472392638037, "grad_norm": 0.2646164894104004, "learning_rate": 2.2060568192019475e-05, "loss": 0.6526812314987183, "step": 9064 }, { "epoch": 11.122699386503067, "grad_norm": 0.3246406018733978, "learning_rate": 2.205558390546674e-05, "loss": 0.5417835712432861, "step": 9065 }, { "epoch": 11.123926380368099, "grad_norm": 0.24800920486450195, "learning_rate": 2.2050599737595022e-05, "loss": 0.7818253040313721, "step": 9066 }, { "epoch": 11.125153374233129, "grad_norm": 0.2553947865962982, "learning_rate": 2.204561568860521e-05, "loss": 0.585527241230011, "step": 9067 }, { "epoch": 11.12638036809816, "grad_norm": 0.30109545588493347, "learning_rate": 2.2040631758698207e-05, "loss": 0.6100291013717651, "step": 9068 }, { "epoch": 11.12760736196319, "grad_norm": 0.30711203813552856, "learning_rate": 2.2035647948074896e-05, "loss": 0.4187234044075012, "step": 9069 }, { "epoch": 11.128834355828221, "grad_norm": 0.2682923972606659, "learning_rate": 2.203066425693617e-05, "loss": 0.6716396808624268, "step": 9070 }, { "epoch": 11.130061349693252, "grad_norm": 0.27149564027786255, "learning_rate": 2.2025680685482893e-05, "loss": 0.5951095819473267, "step": 9071 }, { "epoch": 11.131288343558282, "grad_norm": 0.27031081914901733, "learning_rate": 2.2020697233915944e-05, "loss": 0.8261202573776245, "step": 9072 }, { "epoch": 11.132515337423312, "grad_norm": 0.2664773464202881, "learning_rate": 2.2015713902436196e-05, "loss": 0.5012158155441284, "step": 9073 }, { "epoch": 11.133742331288344, "grad_norm": 0.2492155283689499, "learning_rate": 2.2010730691244504e-05, "loss": 0.6298025250434875, "step": 9074 }, { "epoch": 11.134969325153374, "grad_norm": 0.2727627158164978, "learning_rate": 2.200574760054174e-05, "loss": 0.38819825649261475, "step": 9075 }, { "epoch": 11.136196319018405, "grad_norm": 0.27557969093322754, "learning_rate": 2.2000764630528744e-05, "loss": 0.7293592691421509, "step": 9076 }, { "epoch": 11.137423312883435, "grad_norm": 0.28709420561790466, "learning_rate": 2.199578178140637e-05, "loss": 0.5282491445541382, "step": 9077 }, { "epoch": 11.138650306748467, "grad_norm": 0.3042113780975342, "learning_rate": 2.1990799053375473e-05, "loss": 0.5626571774482727, "step": 9078 }, { "epoch": 11.139877300613497, "grad_norm": 0.31763097643852234, "learning_rate": 2.1985816446636874e-05, "loss": 0.6259374618530273, "step": 9079 }, { "epoch": 11.141104294478527, "grad_norm": 0.2549673318862915, "learning_rate": 2.1980833961391426e-05, "loss": 0.5886330604553223, "step": 9080 }, { "epoch": 11.142331288343557, "grad_norm": 0.27846401929855347, "learning_rate": 2.197585159783994e-05, "loss": 0.46363887190818787, "step": 9081 }, { "epoch": 11.14355828220859, "grad_norm": 0.2691444158554077, "learning_rate": 2.197086935618326e-05, "loss": 0.536634087562561, "step": 9082 }, { "epoch": 11.14478527607362, "grad_norm": 0.2984754741191864, "learning_rate": 2.196588723662219e-05, "loss": 0.6425041556358337, "step": 9083 }, { "epoch": 11.14601226993865, "grad_norm": 0.28303778171539307, "learning_rate": 2.1960905239357557e-05, "loss": 0.6364238262176514, "step": 9084 }, { "epoch": 11.14723926380368, "grad_norm": 0.2781103551387787, "learning_rate": 2.1955923364590167e-05, "loss": 0.7290963530540466, "step": 9085 }, { "epoch": 11.148466257668712, "grad_norm": 0.2657446563243866, "learning_rate": 2.195094161252082e-05, "loss": 0.5703967809677124, "step": 9086 }, { "epoch": 11.149693251533742, "grad_norm": 0.3108218014240265, "learning_rate": 2.194595998335032e-05, "loss": 0.7195274829864502, "step": 9087 }, { "epoch": 11.150920245398773, "grad_norm": 0.2444150149822235, "learning_rate": 2.1940978477279466e-05, "loss": 0.7368209362030029, "step": 9088 }, { "epoch": 11.152147239263805, "grad_norm": 0.2477499097585678, "learning_rate": 2.1935997094509036e-05, "loss": 0.784554123878479, "step": 9089 }, { "epoch": 11.153374233128835, "grad_norm": 0.26518407464027405, "learning_rate": 2.193101583523983e-05, "loss": 0.6913808584213257, "step": 9090 }, { "epoch": 11.154601226993865, "grad_norm": 0.3094088137149811, "learning_rate": 2.1926034699672623e-05, "loss": 0.6455200910568237, "step": 9091 }, { "epoch": 11.155828220858895, "grad_norm": 0.2419147789478302, "learning_rate": 2.1921053688008194e-05, "loss": 0.690969705581665, "step": 9092 }, { "epoch": 11.157055214723927, "grad_norm": 0.2806726098060608, "learning_rate": 2.1916072800447306e-05, "loss": 0.546153724193573, "step": 9093 }, { "epoch": 11.158282208588957, "grad_norm": 0.2628192603588104, "learning_rate": 2.1911092037190732e-05, "loss": 0.6243294477462769, "step": 9094 }, { "epoch": 11.159509202453988, "grad_norm": 0.3047947585582733, "learning_rate": 2.1906111398439223e-05, "loss": 0.585390567779541, "step": 9095 }, { "epoch": 11.160736196319018, "grad_norm": 0.22872212529182434, "learning_rate": 2.190113088439354e-05, "loss": 0.6652216911315918, "step": 9096 }, { "epoch": 11.16196319018405, "grad_norm": 0.3098469078540802, "learning_rate": 2.1896150495254437e-05, "loss": 0.5389223694801331, "step": 9097 }, { "epoch": 11.16319018404908, "grad_norm": 0.2586424648761749, "learning_rate": 2.189117023122265e-05, "loss": 0.7018965482711792, "step": 9098 }, { "epoch": 11.16441717791411, "grad_norm": 0.3246081471443176, "learning_rate": 2.188619009249893e-05, "loss": 0.5294007062911987, "step": 9099 }, { "epoch": 11.16564417177914, "grad_norm": 0.3372189700603485, "learning_rate": 2.1881210079284002e-05, "loss": 0.5372353196144104, "step": 9100 }, { "epoch": 11.166871165644173, "grad_norm": 0.3358982503414154, "learning_rate": 2.1876230191778598e-05, "loss": 0.6401948928833008, "step": 9101 }, { "epoch": 11.168098159509203, "grad_norm": 0.25320008397102356, "learning_rate": 2.1871250430183455e-05, "loss": 0.6143961548805237, "step": 9102 }, { "epoch": 11.169325153374233, "grad_norm": 0.1969994306564331, "learning_rate": 2.1866270794699273e-05, "loss": 0.46879273653030396, "step": 9103 }, { "epoch": 11.170552147239263, "grad_norm": 0.26496535539627075, "learning_rate": 2.1861291285526786e-05, "loss": 0.41326600313186646, "step": 9104 }, { "epoch": 11.171779141104295, "grad_norm": 0.22031626105308533, "learning_rate": 2.1856311902866692e-05, "loss": 0.8166029453277588, "step": 9105 }, { "epoch": 11.173006134969325, "grad_norm": 0.2950466573238373, "learning_rate": 2.1851332646919695e-05, "loss": 0.6752039790153503, "step": 9106 }, { "epoch": 11.174233128834356, "grad_norm": 0.3146372437477112, "learning_rate": 2.1846353517886508e-05, "loss": 0.6872926950454712, "step": 9107 }, { "epoch": 11.175460122699386, "grad_norm": 0.2505442798137665, "learning_rate": 2.184137451596781e-05, "loss": 0.6771498322486877, "step": 9108 }, { "epoch": 11.176687116564418, "grad_norm": 0.2950364351272583, "learning_rate": 2.1836395641364297e-05, "loss": 0.6879955530166626, "step": 9109 }, { "epoch": 11.177914110429448, "grad_norm": 0.24977608025074005, "learning_rate": 2.1831416894276653e-05, "loss": 0.761208176612854, "step": 9110 }, { "epoch": 11.179141104294478, "grad_norm": 0.32559797167778015, "learning_rate": 2.182643827490555e-05, "loss": 0.6368141174316406, "step": 9111 }, { "epoch": 11.180368098159509, "grad_norm": 0.27371954917907715, "learning_rate": 2.1821459783451675e-05, "loss": 0.6845158934593201, "step": 9112 }, { "epoch": 11.18159509202454, "grad_norm": 0.23534215986728668, "learning_rate": 2.1816481420115696e-05, "loss": 0.6029564738273621, "step": 9113 }, { "epoch": 11.18282208588957, "grad_norm": 0.24629823863506317, "learning_rate": 2.1811503185098265e-05, "loss": 0.5803016424179077, "step": 9114 }, { "epoch": 11.184049079754601, "grad_norm": 0.2466009557247162, "learning_rate": 2.1806525078600047e-05, "loss": 0.6196374893188477, "step": 9115 }, { "epoch": 11.185276073619631, "grad_norm": 0.2523120045661926, "learning_rate": 2.1801547100821698e-05, "loss": 0.7470947504043579, "step": 9116 }, { "epoch": 11.186503067484663, "grad_norm": 0.270542174577713, "learning_rate": 2.179656925196386e-05, "loss": 0.5445287227630615, "step": 9117 }, { "epoch": 11.187730061349694, "grad_norm": 0.46137768030166626, "learning_rate": 2.1791591532227184e-05, "loss": 0.40037426352500916, "step": 9118 }, { "epoch": 11.188957055214724, "grad_norm": 0.25245732069015503, "learning_rate": 2.17866139418123e-05, "loss": 0.6683305501937866, "step": 9119 }, { "epoch": 11.190184049079754, "grad_norm": 0.2724197208881378, "learning_rate": 2.178163648091984e-05, "loss": 0.6496622562408447, "step": 9120 }, { "epoch": 11.191411042944786, "grad_norm": 0.28424209356307983, "learning_rate": 2.177665914975044e-05, "loss": 0.46702712774276733, "step": 9121 }, { "epoch": 11.192638036809816, "grad_norm": 0.26112180948257446, "learning_rate": 2.1771681948504714e-05, "loss": 0.8324097394943237, "step": 9122 }, { "epoch": 11.193865030674846, "grad_norm": 0.28663623332977295, "learning_rate": 2.1766704877383277e-05, "loss": 0.5257831811904907, "step": 9123 }, { "epoch": 11.195092024539877, "grad_norm": 0.31847551465034485, "learning_rate": 2.176172793658675e-05, "loss": 0.4803134799003601, "step": 9124 }, { "epoch": 11.196319018404909, "grad_norm": 0.2737928330898285, "learning_rate": 2.1756751126315734e-05, "loss": 0.7173159122467041, "step": 9125 }, { "epoch": 11.197546012269939, "grad_norm": 0.25974828004837036, "learning_rate": 2.1751774446770828e-05, "loss": 0.572816014289856, "step": 9126 }, { "epoch": 11.198773006134969, "grad_norm": 0.28371551632881165, "learning_rate": 2.174679789815263e-05, "loss": 0.819447934627533, "step": 9127 }, { "epoch": 11.2, "grad_norm": 0.25177237391471863, "learning_rate": 2.1741821480661727e-05, "loss": 0.6358751058578491, "step": 9128 }, { "epoch": 11.201226993865031, "grad_norm": 0.2630285620689392, "learning_rate": 2.173684519449872e-05, "loss": 0.7313640117645264, "step": 9129 }, { "epoch": 11.202453987730062, "grad_norm": 0.24596473574638367, "learning_rate": 2.1731869039864162e-05, "loss": 0.7250412702560425, "step": 9130 }, { "epoch": 11.203680981595092, "grad_norm": 0.30472853779792786, "learning_rate": 2.172689301695865e-05, "loss": 0.5723587870597839, "step": 9131 }, { "epoch": 11.204907975460122, "grad_norm": 0.270215779542923, "learning_rate": 2.1721917125982743e-05, "loss": 0.5292172431945801, "step": 9132 }, { "epoch": 11.206134969325154, "grad_norm": 0.2658150792121887, "learning_rate": 2.1716941367137e-05, "loss": 0.6449109315872192, "step": 9133 }, { "epoch": 11.207361963190184, "grad_norm": 0.20869550108909607, "learning_rate": 2.1711965740621993e-05, "loss": 0.5119760036468506, "step": 9134 }, { "epoch": 11.208588957055214, "grad_norm": 0.2543264329433441, "learning_rate": 2.170699024663827e-05, "loss": 0.5334004163742065, "step": 9135 }, { "epoch": 11.209815950920245, "grad_norm": 0.25193098187446594, "learning_rate": 2.170201488538638e-05, "loss": 0.7654427886009216, "step": 9136 }, { "epoch": 11.211042944785277, "grad_norm": 0.247791588306427, "learning_rate": 2.1697039657066858e-05, "loss": 0.49739402532577515, "step": 9137 }, { "epoch": 11.212269938650307, "grad_norm": 0.2751520872116089, "learning_rate": 2.1692064561880254e-05, "loss": 0.4174189865589142, "step": 9138 }, { "epoch": 11.213496932515337, "grad_norm": 0.3976013660430908, "learning_rate": 2.168708960002709e-05, "loss": 0.522167980670929, "step": 9139 }, { "epoch": 11.214723926380367, "grad_norm": 0.3088144063949585, "learning_rate": 2.1682114771707898e-05, "loss": 0.7106155157089233, "step": 9140 }, { "epoch": 11.2159509202454, "grad_norm": 0.2597728967666626, "learning_rate": 2.1677140077123192e-05, "loss": 0.4596497416496277, "step": 9141 }, { "epoch": 11.21717791411043, "grad_norm": 0.2634933888912201, "learning_rate": 2.1672165516473494e-05, "loss": 0.6407109498977661, "step": 9142 }, { "epoch": 11.21840490797546, "grad_norm": 0.25936248898506165, "learning_rate": 2.1667191089959317e-05, "loss": 0.7897655963897705, "step": 9143 }, { "epoch": 11.21963190184049, "grad_norm": 0.22458231449127197, "learning_rate": 2.1662216797781157e-05, "loss": 0.5201795697212219, "step": 9144 }, { "epoch": 11.220858895705522, "grad_norm": 0.2561405599117279, "learning_rate": 2.1657242640139524e-05, "loss": 0.6013767719268799, "step": 9145 }, { "epoch": 11.222085889570552, "grad_norm": 0.3203468918800354, "learning_rate": 2.16522686172349e-05, "loss": 0.7114982604980469, "step": 9146 }, { "epoch": 11.223312883435582, "grad_norm": 0.4019278883934021, "learning_rate": 2.1647294729267777e-05, "loss": 0.5743290185928345, "step": 9147 }, { "epoch": 11.224539877300613, "grad_norm": 0.2732108533382416, "learning_rate": 2.1642320976438652e-05, "loss": 0.5212929248809814, "step": 9148 }, { "epoch": 11.225766871165645, "grad_norm": 0.22142532467842102, "learning_rate": 2.1637347358947984e-05, "loss": 0.751151442527771, "step": 9149 }, { "epoch": 11.226993865030675, "grad_norm": 0.3170070946216583, "learning_rate": 2.163237387699626e-05, "loss": 0.43013790249824524, "step": 9150 }, { "epoch": 11.228220858895705, "grad_norm": 0.3375020921230316, "learning_rate": 2.1627400530783932e-05, "loss": 0.7055546045303345, "step": 9151 }, { "epoch": 11.229447852760735, "grad_norm": 0.2687617540359497, "learning_rate": 2.1622427320511472e-05, "loss": 0.7552393674850464, "step": 9152 }, { "epoch": 11.230674846625767, "grad_norm": 0.25720980763435364, "learning_rate": 2.1617454246379338e-05, "loss": 0.7517626881599426, "step": 9153 }, { "epoch": 11.231901840490798, "grad_norm": 0.26661354303359985, "learning_rate": 2.1612481308587972e-05, "loss": 0.5610179901123047, "step": 9154 }, { "epoch": 11.233128834355828, "grad_norm": 0.28532281517982483, "learning_rate": 2.160750850733782e-05, "loss": 0.683078944683075, "step": 9155 }, { "epoch": 11.23435582822086, "grad_norm": 0.24140575528144836, "learning_rate": 2.1602535842829324e-05, "loss": 0.7721619606018066, "step": 9156 }, { "epoch": 11.23558282208589, "grad_norm": 0.2756720781326294, "learning_rate": 2.1597563315262926e-05, "loss": 0.685316264629364, "step": 9157 }, { "epoch": 11.23680981595092, "grad_norm": 0.21699129045009613, "learning_rate": 2.159259092483904e-05, "loss": 0.6362005472183228, "step": 9158 }, { "epoch": 11.23803680981595, "grad_norm": 0.3130972981452942, "learning_rate": 2.1587618671758102e-05, "loss": 0.45644229650497437, "step": 9159 }, { "epoch": 11.239263803680982, "grad_norm": 0.3035281300544739, "learning_rate": 2.158264655622052e-05, "loss": 0.6643524765968323, "step": 9160 }, { "epoch": 11.240490797546013, "grad_norm": 0.29074928164482117, "learning_rate": 2.1577674578426705e-05, "loss": 0.5224682092666626, "step": 9161 }, { "epoch": 11.241717791411043, "grad_norm": 0.24040095508098602, "learning_rate": 2.1572702738577073e-05, "loss": 0.7670859694480896, "step": 9162 }, { "epoch": 11.242944785276073, "grad_norm": 0.3231178820133209, "learning_rate": 2.1567731036872017e-05, "loss": 0.5377202033996582, "step": 9163 }, { "epoch": 11.244171779141105, "grad_norm": 0.22918707132339478, "learning_rate": 2.1562759473511933e-05, "loss": 0.6332575082778931, "step": 9164 }, { "epoch": 11.245398773006135, "grad_norm": 0.35016292333602905, "learning_rate": 2.155778804869721e-05, "loss": 0.44745826721191406, "step": 9165 }, { "epoch": 11.246625766871166, "grad_norm": 0.29133254289627075, "learning_rate": 2.1552816762628233e-05, "loss": 0.6555262804031372, "step": 9166 }, { "epoch": 11.247852760736196, "grad_norm": 0.27214106917381287, "learning_rate": 2.154784561550539e-05, "loss": 0.6843064427375793, "step": 9167 }, { "epoch": 11.249079754601228, "grad_norm": 0.31438031792640686, "learning_rate": 2.154287460752903e-05, "loss": 0.5932817459106445, "step": 9168 }, { "epoch": 11.250306748466258, "grad_norm": 0.23930111527442932, "learning_rate": 2.1537903738899546e-05, "loss": 0.4243021607398987, "step": 9169 }, { "epoch": 11.251533742331288, "grad_norm": 0.3798765540122986, "learning_rate": 2.1532933009817283e-05, "loss": 0.4414556622505188, "step": 9170 }, { "epoch": 11.252760736196318, "grad_norm": 0.2414400279521942, "learning_rate": 2.15279624204826e-05, "loss": 0.6050628423690796, "step": 9171 }, { "epoch": 11.25398773006135, "grad_norm": 0.2560284733772278, "learning_rate": 2.1522991971095853e-05, "loss": 0.5631875991821289, "step": 9172 }, { "epoch": 11.25521472392638, "grad_norm": 0.2572903037071228, "learning_rate": 2.151802166185738e-05, "loss": 0.7015266418457031, "step": 9173 }, { "epoch": 11.256441717791411, "grad_norm": 0.23446127772331238, "learning_rate": 2.1513051492967522e-05, "loss": 0.4857363700866699, "step": 9174 }, { "epoch": 11.257668711656441, "grad_norm": 0.2979412376880646, "learning_rate": 2.150808146462662e-05, "loss": 0.4821474552154541, "step": 9175 }, { "epoch": 11.258895705521473, "grad_norm": 0.36776334047317505, "learning_rate": 2.1503111577034986e-05, "loss": 0.4788132309913635, "step": 9176 }, { "epoch": 11.260122699386503, "grad_norm": 0.2509695887565613, "learning_rate": 2.149814183039296e-05, "loss": 0.6262782216072083, "step": 9177 }, { "epoch": 11.261349693251534, "grad_norm": 0.2947821617126465, "learning_rate": 2.149317222490083e-05, "loss": 0.8810127973556519, "step": 9178 }, { "epoch": 11.262576687116564, "grad_norm": 0.266243040561676, "learning_rate": 2.1488202760758946e-05, "loss": 0.5950149297714233, "step": 9179 }, { "epoch": 11.263803680981596, "grad_norm": 0.2558724284172058, "learning_rate": 2.1483233438167584e-05, "loss": 0.6493290662765503, "step": 9180 }, { "epoch": 11.265030674846626, "grad_norm": 0.27963149547576904, "learning_rate": 2.147826425732706e-05, "loss": 0.5597904920578003, "step": 9181 }, { "epoch": 11.266257668711656, "grad_norm": 0.2783701717853546, "learning_rate": 2.1473295218437654e-05, "loss": 0.7982784509658813, "step": 9182 }, { "epoch": 11.267484662576686, "grad_norm": 0.23584426939487457, "learning_rate": 2.1468326321699654e-05, "loss": 0.6566935777664185, "step": 9183 }, { "epoch": 11.268711656441718, "grad_norm": 0.25881779193878174, "learning_rate": 2.1463357567313358e-05, "loss": 0.5182202458381653, "step": 9184 }, { "epoch": 11.269938650306749, "grad_norm": 0.3241281509399414, "learning_rate": 2.1458388955479024e-05, "loss": 0.7875673770904541, "step": 9185 }, { "epoch": 11.271165644171779, "grad_norm": 0.2774723470211029, "learning_rate": 2.145342048639694e-05, "loss": 0.5611754655838013, "step": 9186 }, { "epoch": 11.27239263803681, "grad_norm": 0.31528693437576294, "learning_rate": 2.1448452160267353e-05, "loss": 0.5772771835327148, "step": 9187 }, { "epoch": 11.273619631901841, "grad_norm": 0.2636864185333252, "learning_rate": 2.1443483977290527e-05, "loss": 0.6353883743286133, "step": 9188 }, { "epoch": 11.274846625766871, "grad_norm": 0.26018235087394714, "learning_rate": 2.1438515937666725e-05, "loss": 0.5647826194763184, "step": 9189 }, { "epoch": 11.276073619631902, "grad_norm": 0.217678502202034, "learning_rate": 2.1433548041596184e-05, "loss": 0.6434089541435242, "step": 9190 }, { "epoch": 11.277300613496932, "grad_norm": 0.4262967109680176, "learning_rate": 2.1428580289279155e-05, "loss": 0.7908389568328857, "step": 9191 }, { "epoch": 11.278527607361964, "grad_norm": 0.24630562961101532, "learning_rate": 2.142361268091586e-05, "loss": 0.7925242781639099, "step": 9192 }, { "epoch": 11.279754601226994, "grad_norm": 0.22115804255008698, "learning_rate": 2.1418645216706544e-05, "loss": 0.8465285301208496, "step": 9193 }, { "epoch": 11.280981595092024, "grad_norm": 0.31113114953041077, "learning_rate": 2.1413677896851426e-05, "loss": 0.6522484421730042, "step": 9194 }, { "epoch": 11.282208588957054, "grad_norm": 0.2436295747756958, "learning_rate": 2.1408710721550716e-05, "loss": 0.7290533781051636, "step": 9195 }, { "epoch": 11.283435582822086, "grad_norm": 0.2751355469226837, "learning_rate": 2.1403743691004644e-05, "loss": 0.6594877243041992, "step": 9196 }, { "epoch": 11.284662576687117, "grad_norm": 0.2134067416191101, "learning_rate": 2.13987768054134e-05, "loss": 0.5473116040229797, "step": 9197 }, { "epoch": 11.285889570552147, "grad_norm": 0.22654104232788086, "learning_rate": 2.139381006497719e-05, "loss": 0.7765347957611084, "step": 9198 }, { "epoch": 11.287116564417177, "grad_norm": 0.28542134165763855, "learning_rate": 2.138884346989622e-05, "loss": 0.7355608344078064, "step": 9199 }, { "epoch": 11.28834355828221, "grad_norm": 0.250336617231369, "learning_rate": 2.138387702037066e-05, "loss": 0.7034257650375366, "step": 9200 }, { "epoch": 11.28957055214724, "grad_norm": 0.3108564615249634, "learning_rate": 2.1378910716600702e-05, "loss": 0.6572414040565491, "step": 9201 }, { "epoch": 11.29079754601227, "grad_norm": 0.2665320634841919, "learning_rate": 2.1373944558786533e-05, "loss": 0.42565029859542847, "step": 9202 }, { "epoch": 11.2920245398773, "grad_norm": 0.2875511646270752, "learning_rate": 2.136897854712832e-05, "loss": 0.7121877074241638, "step": 9203 }, { "epoch": 11.293251533742332, "grad_norm": 0.23339740931987762, "learning_rate": 2.136401268182622e-05, "loss": 0.44331973791122437, "step": 9204 }, { "epoch": 11.294478527607362, "grad_norm": 0.2430707812309265, "learning_rate": 2.1359046963080404e-05, "loss": 0.7006173133850098, "step": 9205 }, { "epoch": 11.295705521472392, "grad_norm": 0.2851322591304779, "learning_rate": 2.1354081391091013e-05, "loss": 0.5516895651817322, "step": 9206 }, { "epoch": 11.296932515337422, "grad_norm": 0.34032195806503296, "learning_rate": 2.1349115966058206e-05, "loss": 0.4557649791240692, "step": 9207 }, { "epoch": 11.298159509202454, "grad_norm": 0.26027652621269226, "learning_rate": 2.1344150688182128e-05, "loss": 0.6550487875938416, "step": 9208 }, { "epoch": 11.299386503067485, "grad_norm": 0.3015492260456085, "learning_rate": 2.13391855576629e-05, "loss": 0.7031430006027222, "step": 9209 }, { "epoch": 11.300613496932515, "grad_norm": 0.2538799047470093, "learning_rate": 2.1334220574700668e-05, "loss": 0.5827081203460693, "step": 9210 }, { "epoch": 11.301840490797545, "grad_norm": 0.23894837498664856, "learning_rate": 2.1329255739495544e-05, "loss": 0.7972933650016785, "step": 9211 }, { "epoch": 11.303067484662577, "grad_norm": 0.24235662817955017, "learning_rate": 2.1324291052247652e-05, "loss": 0.6182185411453247, "step": 9212 }, { "epoch": 11.304294478527607, "grad_norm": 0.22573411464691162, "learning_rate": 2.1319326513157112e-05, "loss": 0.688136637210846, "step": 9213 }, { "epoch": 11.305521472392638, "grad_norm": 0.24790041148662567, "learning_rate": 2.1314362122424013e-05, "loss": 0.6974856853485107, "step": 9214 }, { "epoch": 11.30674846625767, "grad_norm": 0.2539604604244232, "learning_rate": 2.1309397880248475e-05, "loss": 0.6436777710914612, "step": 9215 }, { "epoch": 11.3079754601227, "grad_norm": 0.2864295542240143, "learning_rate": 2.1304433786830574e-05, "loss": 0.6612005233764648, "step": 9216 }, { "epoch": 11.30920245398773, "grad_norm": 0.3315511643886566, "learning_rate": 2.1299469842370407e-05, "loss": 0.6390516757965088, "step": 9217 }, { "epoch": 11.31042944785276, "grad_norm": 0.2641281187534332, "learning_rate": 2.1294506047068063e-05, "loss": 0.5486791133880615, "step": 9218 }, { "epoch": 11.31165644171779, "grad_norm": 0.25894895195961, "learning_rate": 2.1289542401123608e-05, "loss": 0.6120495796203613, "step": 9219 }, { "epoch": 11.312883435582823, "grad_norm": 0.3045806586742401, "learning_rate": 2.1284578904737117e-05, "loss": 0.7501691579818726, "step": 9220 }, { "epoch": 11.314110429447853, "grad_norm": 0.23031899333000183, "learning_rate": 2.127961555810865e-05, "loss": 0.8537334203720093, "step": 9221 }, { "epoch": 11.315337423312883, "grad_norm": 0.24807745218276978, "learning_rate": 2.1274652361438266e-05, "loss": 0.7749733924865723, "step": 9222 }, { "epoch": 11.316564417177915, "grad_norm": 0.2781844139099121, "learning_rate": 2.1269689314926024e-05, "loss": 0.574722170829773, "step": 9223 }, { "epoch": 11.317791411042945, "grad_norm": 0.23043450713157654, "learning_rate": 2.1264726418771972e-05, "loss": 0.7842393517494202, "step": 9224 }, { "epoch": 11.319018404907975, "grad_norm": 0.22344814240932465, "learning_rate": 2.125976367317614e-05, "loss": 0.6924422383308411, "step": 9225 }, { "epoch": 11.320245398773006, "grad_norm": 0.2629704475402832, "learning_rate": 2.1254801078338565e-05, "loss": 0.5638394355773926, "step": 9226 }, { "epoch": 11.321472392638038, "grad_norm": 0.2714121341705322, "learning_rate": 2.1249838634459284e-05, "loss": 0.7319701910018921, "step": 9227 }, { "epoch": 11.322699386503068, "grad_norm": 0.33443018794059753, "learning_rate": 2.1244876341738307e-05, "loss": 0.5992929339408875, "step": 9228 }, { "epoch": 11.323926380368098, "grad_norm": 0.29853692650794983, "learning_rate": 2.123991420037565e-05, "loss": 0.791219174861908, "step": 9229 }, { "epoch": 11.325153374233128, "grad_norm": 0.33266115188598633, "learning_rate": 2.123495221057134e-05, "loss": 0.4092864394187927, "step": 9230 }, { "epoch": 11.32638036809816, "grad_norm": 0.2690590023994446, "learning_rate": 2.122999037252536e-05, "loss": 0.6261805295944214, "step": 9231 }, { "epoch": 11.32760736196319, "grad_norm": 0.28607267141342163, "learning_rate": 2.122502868643772e-05, "loss": 0.599034309387207, "step": 9232 }, { "epoch": 11.32883435582822, "grad_norm": 0.2833842933177948, "learning_rate": 2.12200671525084e-05, "loss": 0.74826580286026, "step": 9233 }, { "epoch": 11.330061349693251, "grad_norm": 0.22526316344738007, "learning_rate": 2.1215105770937398e-05, "loss": 0.7050357460975647, "step": 9234 }, { "epoch": 11.331288343558283, "grad_norm": 0.3140193819999695, "learning_rate": 2.1210144541924686e-05, "loss": 0.5462483167648315, "step": 9235 }, { "epoch": 11.332515337423313, "grad_norm": 0.3922409117221832, "learning_rate": 2.1205183465670235e-05, "loss": 0.7378919124603271, "step": 9236 }, { "epoch": 11.333742331288343, "grad_norm": 0.26861560344696045, "learning_rate": 2.1200222542374025e-05, "loss": 0.6075173616409302, "step": 9237 }, { "epoch": 11.334969325153374, "grad_norm": 0.25649112462997437, "learning_rate": 2.1195261772236e-05, "loss": 0.6320869326591492, "step": 9238 }, { "epoch": 11.336196319018406, "grad_norm": 0.32649311423301697, "learning_rate": 2.1190301155456118e-05, "loss": 0.5795181393623352, "step": 9239 }, { "epoch": 11.337423312883436, "grad_norm": 0.26079052686691284, "learning_rate": 2.118534069223434e-05, "loss": 0.6183387041091919, "step": 9240 }, { "epoch": 11.338650306748466, "grad_norm": 0.23849034309387207, "learning_rate": 2.1180380382770594e-05, "loss": 0.5337188243865967, "step": 9241 }, { "epoch": 11.339877300613496, "grad_norm": 0.2693663239479065, "learning_rate": 2.1175420227264824e-05, "loss": 0.6478089094161987, "step": 9242 }, { "epoch": 11.341104294478528, "grad_norm": 0.32237523794174194, "learning_rate": 2.117046022591695e-05, "loss": 0.6054697632789612, "step": 9243 }, { "epoch": 11.342331288343559, "grad_norm": 0.2827628552913666, "learning_rate": 2.1165500378926897e-05, "loss": 0.6773173809051514, "step": 9244 }, { "epoch": 11.343558282208589, "grad_norm": 0.29086819291114807, "learning_rate": 2.1160540686494594e-05, "loss": 0.5585677027702332, "step": 9245 }, { "epoch": 11.344785276073619, "grad_norm": 0.2418271005153656, "learning_rate": 2.1155581148819954e-05, "loss": 0.4596419334411621, "step": 9246 }, { "epoch": 11.346012269938651, "grad_norm": 0.2579409182071686, "learning_rate": 2.1150621766102864e-05, "loss": 0.6026814579963684, "step": 9247 }, { "epoch": 11.347239263803681, "grad_norm": 0.28749850392341614, "learning_rate": 2.1145662538543232e-05, "loss": 0.8425757884979248, "step": 9248 }, { "epoch": 11.348466257668711, "grad_norm": 0.23479041457176208, "learning_rate": 2.1140703466340956e-05, "loss": 0.611431360244751, "step": 9249 }, { "epoch": 11.349693251533742, "grad_norm": 0.2736389636993408, "learning_rate": 2.113574454969591e-05, "loss": 0.6021729707717896, "step": 9250 }, { "epoch": 11.350920245398774, "grad_norm": 0.2560933828353882, "learning_rate": 2.1130785788807987e-05, "loss": 0.7417241334915161, "step": 9251 }, { "epoch": 11.352147239263804, "grad_norm": 0.26884958148002625, "learning_rate": 2.1125827183877047e-05, "loss": 0.6459250450134277, "step": 9252 }, { "epoch": 11.353374233128834, "grad_norm": 0.23363390564918518, "learning_rate": 2.1120868735102963e-05, "loss": 0.5142704248428345, "step": 9253 }, { "epoch": 11.354601226993864, "grad_norm": 0.25225621461868286, "learning_rate": 2.1115910442685605e-05, "loss": 0.5168533325195312, "step": 9254 }, { "epoch": 11.355828220858896, "grad_norm": 0.2950744926929474, "learning_rate": 2.1110952306824814e-05, "loss": 0.5995396375656128, "step": 9255 }, { "epoch": 11.357055214723927, "grad_norm": 0.2871899902820587, "learning_rate": 2.1105994327720447e-05, "loss": 0.5647052526473999, "step": 9256 }, { "epoch": 11.358282208588957, "grad_norm": 0.28877803683280945, "learning_rate": 2.1101036505572335e-05, "loss": 0.7830547094345093, "step": 9257 }, { "epoch": 11.359509202453987, "grad_norm": 0.27421504259109497, "learning_rate": 2.1096078840580324e-05, "loss": 0.6627039909362793, "step": 9258 }, { "epoch": 11.360736196319019, "grad_norm": 0.25713008642196655, "learning_rate": 2.1091121332944246e-05, "loss": 0.61788010597229, "step": 9259 }, { "epoch": 11.36196319018405, "grad_norm": 0.24106240272521973, "learning_rate": 2.1086163982863913e-05, "loss": 0.6774764060974121, "step": 9260 }, { "epoch": 11.36319018404908, "grad_norm": 0.29415374994277954, "learning_rate": 2.1081206790539154e-05, "loss": 0.6559942960739136, "step": 9261 }, { "epoch": 11.36441717791411, "grad_norm": 0.27793142199516296, "learning_rate": 2.1076249756169765e-05, "loss": 0.717361569404602, "step": 9262 }, { "epoch": 11.365644171779142, "grad_norm": 0.2690800428390503, "learning_rate": 2.1071292879955557e-05, "loss": 0.877000093460083, "step": 9263 }, { "epoch": 11.366871165644172, "grad_norm": 0.30013740062713623, "learning_rate": 2.1066336162096333e-05, "loss": 0.5163531303405762, "step": 9264 }, { "epoch": 11.368098159509202, "grad_norm": 0.224049374461174, "learning_rate": 2.106137960279188e-05, "loss": 0.7091145515441895, "step": 9265 }, { "epoch": 11.369325153374232, "grad_norm": 0.23894351720809937, "learning_rate": 2.1056423202241978e-05, "loss": 0.6131893396377563, "step": 9266 }, { "epoch": 11.370552147239264, "grad_norm": 0.3142900764942169, "learning_rate": 2.10514669606464e-05, "loss": 0.6016556024551392, "step": 9267 }, { "epoch": 11.371779141104295, "grad_norm": 0.3060808479785919, "learning_rate": 2.104651087820494e-05, "loss": 0.7474495768547058, "step": 9268 }, { "epoch": 11.373006134969325, "grad_norm": 0.2491709589958191, "learning_rate": 2.1041554955117346e-05, "loss": 0.5687295794487, "step": 9269 }, { "epoch": 11.374233128834355, "grad_norm": 0.30069079995155334, "learning_rate": 2.103659919158339e-05, "loss": 0.6761554479598999, "step": 9270 }, { "epoch": 11.375460122699387, "grad_norm": 0.277439683675766, "learning_rate": 2.1031643587802808e-05, "loss": 0.48275893926620483, "step": 9271 }, { "epoch": 11.376687116564417, "grad_norm": 0.2822880148887634, "learning_rate": 2.1026688143975354e-05, "loss": 0.767430305480957, "step": 9272 }, { "epoch": 11.377914110429447, "grad_norm": 0.25566214323043823, "learning_rate": 2.1021732860300776e-05, "loss": 0.7424089312553406, "step": 9273 }, { "epoch": 11.379141104294478, "grad_norm": 0.3396860361099243, "learning_rate": 2.1016777736978796e-05, "loss": 0.48986244201660156, "step": 9274 }, { "epoch": 11.38036809815951, "grad_norm": 0.2601091265678406, "learning_rate": 2.101182277420915e-05, "loss": 0.573766827583313, "step": 9275 }, { "epoch": 11.38159509202454, "grad_norm": 0.2376060038805008, "learning_rate": 2.1006867972191545e-05, "loss": 0.8067672252655029, "step": 9276 }, { "epoch": 11.38282208588957, "grad_norm": 0.26791268587112427, "learning_rate": 2.1001913331125706e-05, "loss": 0.58965003490448, "step": 9277 }, { "epoch": 11.3840490797546, "grad_norm": 0.26735419034957886, "learning_rate": 2.099695885121134e-05, "loss": 0.6912914514541626, "step": 9278 }, { "epoch": 11.385276073619632, "grad_norm": 0.2865115702152252, "learning_rate": 2.0992004532648144e-05, "loss": 0.8346737623214722, "step": 9279 }, { "epoch": 11.386503067484663, "grad_norm": 0.2534911334514618, "learning_rate": 2.0987050375635813e-05, "loss": 0.4303930103778839, "step": 9280 }, { "epoch": 11.387730061349693, "grad_norm": 0.2369018942117691, "learning_rate": 2.0982096380374042e-05, "loss": 0.6808634996414185, "step": 9281 }, { "epoch": 11.388957055214725, "grad_norm": 0.2542942762374878, "learning_rate": 2.09771425470625e-05, "loss": 0.6568490266799927, "step": 9282 }, { "epoch": 11.390184049079755, "grad_norm": 0.3267168700695038, "learning_rate": 2.0972188875900873e-05, "loss": 0.5630606412887573, "step": 9283 }, { "epoch": 11.391411042944785, "grad_norm": 0.2146521806716919, "learning_rate": 2.096723536708882e-05, "loss": 0.6566811800003052, "step": 9284 }, { "epoch": 11.392638036809815, "grad_norm": 0.28005707263946533, "learning_rate": 2.0962282020826008e-05, "loss": 0.47483348846435547, "step": 9285 }, { "epoch": 11.393865030674847, "grad_norm": 0.290976881980896, "learning_rate": 2.0957328837312096e-05, "loss": 0.7146531343460083, "step": 9286 }, { "epoch": 11.395092024539878, "grad_norm": 0.2283306121826172, "learning_rate": 2.0952375816746726e-05, "loss": 0.6466717720031738, "step": 9287 }, { "epoch": 11.396319018404908, "grad_norm": 0.2656235098838806, "learning_rate": 2.0947422959329545e-05, "loss": 0.8077762722969055, "step": 9288 }, { "epoch": 11.397546012269938, "grad_norm": 0.32301753759384155, "learning_rate": 2.094247026526017e-05, "loss": 0.6744660139083862, "step": 9289 }, { "epoch": 11.39877300613497, "grad_norm": 0.23475876450538635, "learning_rate": 2.0937517734738266e-05, "loss": 0.7363082766532898, "step": 9290 }, { "epoch": 11.4, "grad_norm": 0.28060171008110046, "learning_rate": 2.093256536796343e-05, "loss": 0.8129966259002686, "step": 9291 }, { "epoch": 11.40122699386503, "grad_norm": 0.3598639965057373, "learning_rate": 2.0927613165135285e-05, "loss": 0.563295841217041, "step": 9292 }, { "epoch": 11.40245398773006, "grad_norm": 0.3481692373752594, "learning_rate": 2.0922661126453432e-05, "loss": 0.5832909345626831, "step": 9293 }, { "epoch": 11.403680981595093, "grad_norm": 0.222321018576622, "learning_rate": 2.0917709252117486e-05, "loss": 0.6755556464195251, "step": 9294 }, { "epoch": 11.404907975460123, "grad_norm": 0.2614010274410248, "learning_rate": 2.0912757542327037e-05, "loss": 0.6870561242103577, "step": 9295 }, { "epoch": 11.406134969325153, "grad_norm": 0.26699283719062805, "learning_rate": 2.090780599728167e-05, "loss": 0.7567181587219238, "step": 9296 }, { "epoch": 11.407361963190183, "grad_norm": 0.28052979707717896, "learning_rate": 2.090285461718098e-05, "loss": 0.7375632524490356, "step": 9297 }, { "epoch": 11.408588957055215, "grad_norm": 0.23669423162937164, "learning_rate": 2.089790340222453e-05, "loss": 0.8789891004562378, "step": 9298 }, { "epoch": 11.409815950920246, "grad_norm": 0.27676159143447876, "learning_rate": 2.0892952352611888e-05, "loss": 0.6745187044143677, "step": 9299 }, { "epoch": 11.411042944785276, "grad_norm": 0.32098671793937683, "learning_rate": 2.0888001468542634e-05, "loss": 0.7403053045272827, "step": 9300 }, { "epoch": 11.412269938650306, "grad_norm": 0.2332574874162674, "learning_rate": 2.0883050750216305e-05, "loss": 0.6990491151809692, "step": 9301 }, { "epoch": 11.413496932515338, "grad_norm": 0.23323215544223785, "learning_rate": 2.0878100197832465e-05, "loss": 0.7025337219238281, "step": 9302 }, { "epoch": 11.414723926380368, "grad_norm": 0.24854879081249237, "learning_rate": 2.0873149811590645e-05, "loss": 0.6605219841003418, "step": 9303 }, { "epoch": 11.415950920245399, "grad_norm": 0.335536390542984, "learning_rate": 2.0868199591690385e-05, "loss": 0.4143550395965576, "step": 9304 }, { "epoch": 11.417177914110429, "grad_norm": 0.2926720082759857, "learning_rate": 2.086324953833122e-05, "loss": 0.6593688726425171, "step": 9305 }, { "epoch": 11.41840490797546, "grad_norm": 0.2697620093822479, "learning_rate": 2.0858299651712666e-05, "loss": 0.5539454221725464, "step": 9306 }, { "epoch": 11.419631901840491, "grad_norm": 0.23755031824111938, "learning_rate": 2.085334993203424e-05, "loss": 0.6078875660896301, "step": 9307 }, { "epoch": 11.420858895705521, "grad_norm": 0.2222813367843628, "learning_rate": 2.084840037949545e-05, "loss": 0.5549465417861938, "step": 9308 }, { "epoch": 11.422085889570551, "grad_norm": 0.31110548973083496, "learning_rate": 2.08434509942958e-05, "loss": 0.7049819231033325, "step": 9309 }, { "epoch": 11.423312883435583, "grad_norm": 0.3282654881477356, "learning_rate": 2.083850177663479e-05, "loss": 0.7486289739608765, "step": 9310 }, { "epoch": 11.424539877300614, "grad_norm": 0.2366526573896408, "learning_rate": 2.0833552726711895e-05, "loss": 0.7029298543930054, "step": 9311 }, { "epoch": 11.425766871165644, "grad_norm": 0.28903377056121826, "learning_rate": 2.082860384472661e-05, "loss": 0.5746320486068726, "step": 9312 }, { "epoch": 11.426993865030674, "grad_norm": 0.3120630085468292, "learning_rate": 2.0823655130878407e-05, "loss": 0.7451267242431641, "step": 9313 }, { "epoch": 11.428220858895706, "grad_norm": 0.2894720733165741, "learning_rate": 2.081870658536676e-05, "loss": 0.8203052282333374, "step": 9314 }, { "epoch": 11.429447852760736, "grad_norm": 0.26367342472076416, "learning_rate": 2.081375820839112e-05, "loss": 0.660153865814209, "step": 9315 }, { "epoch": 11.430674846625767, "grad_norm": 0.28386515378952026, "learning_rate": 2.0808810000150955e-05, "loss": 0.5592660307884216, "step": 9316 }, { "epoch": 11.431901840490797, "grad_norm": 0.2902384400367737, "learning_rate": 2.0803861960845697e-05, "loss": 0.7093340754508972, "step": 9317 }, { "epoch": 11.433128834355829, "grad_norm": 0.28867578506469727, "learning_rate": 2.07989140906748e-05, "loss": 0.6562115550041199, "step": 9318 }, { "epoch": 11.434355828220859, "grad_norm": 0.3371717631816864, "learning_rate": 2.0793966389837693e-05, "loss": 0.440122127532959, "step": 9319 }, { "epoch": 11.43558282208589, "grad_norm": 0.3462318778038025, "learning_rate": 2.07890188585338e-05, "loss": 0.6258636713027954, "step": 9320 }, { "epoch": 11.43680981595092, "grad_norm": 0.3143799901008606, "learning_rate": 2.0784071496962555e-05, "loss": 0.6245794892311096, "step": 9321 }, { "epoch": 11.438036809815952, "grad_norm": 0.2836146056652069, "learning_rate": 2.0779124305323358e-05, "loss": 0.6963793635368347, "step": 9322 }, { "epoch": 11.439263803680982, "grad_norm": 0.2573850154876709, "learning_rate": 2.077417728381562e-05, "loss": 0.7303940653800964, "step": 9323 }, { "epoch": 11.440490797546012, "grad_norm": 0.2672276794910431, "learning_rate": 2.0769230432638754e-05, "loss": 0.7165666818618774, "step": 9324 }, { "epoch": 11.441717791411042, "grad_norm": 0.3516356647014618, "learning_rate": 2.076428375199213e-05, "loss": 0.2815270721912384, "step": 9325 }, { "epoch": 11.442944785276074, "grad_norm": 0.24415305256843567, "learning_rate": 2.0759337242075153e-05, "loss": 0.5833038687705994, "step": 9326 }, { "epoch": 11.444171779141104, "grad_norm": 0.22807732224464417, "learning_rate": 2.07543909030872e-05, "loss": 0.5630716681480408, "step": 9327 }, { "epoch": 11.445398773006135, "grad_norm": 0.2959900200366974, "learning_rate": 2.0749444735227636e-05, "loss": 0.6766860485076904, "step": 9328 }, { "epoch": 11.446625766871165, "grad_norm": 0.3455599248409271, "learning_rate": 2.0744498738695836e-05, "loss": 0.5613816380500793, "step": 9329 }, { "epoch": 11.447852760736197, "grad_norm": 0.3055769205093384, "learning_rate": 2.0739552913691153e-05, "loss": 0.6927261352539062, "step": 9330 }, { "epoch": 11.449079754601227, "grad_norm": 0.3069654107093811, "learning_rate": 2.0734607260412936e-05, "loss": 0.5159952640533447, "step": 9331 }, { "epoch": 11.450306748466257, "grad_norm": 0.3390536904335022, "learning_rate": 2.0729661779060545e-05, "loss": 0.6169635653495789, "step": 9332 }, { "epoch": 11.451533742331288, "grad_norm": 0.3343023359775543, "learning_rate": 2.0724716469833294e-05, "loss": 0.4426761269569397, "step": 9333 }, { "epoch": 11.45276073619632, "grad_norm": 0.25798624753952026, "learning_rate": 2.0719771332930533e-05, "loss": 0.6024259328842163, "step": 9334 }, { "epoch": 11.45398773006135, "grad_norm": 0.28668543696403503, "learning_rate": 2.0714826368551586e-05, "loss": 0.6733055114746094, "step": 9335 }, { "epoch": 11.45521472392638, "grad_norm": 0.253031462430954, "learning_rate": 2.0709881576895768e-05, "loss": 0.47961270809173584, "step": 9336 }, { "epoch": 11.45644171779141, "grad_norm": 0.2555457353591919, "learning_rate": 2.0704936958162387e-05, "loss": 0.5329340696334839, "step": 9337 }, { "epoch": 11.457668711656442, "grad_norm": 0.26150017976760864, "learning_rate": 2.069999251255075e-05, "loss": 0.7413214445114136, "step": 9338 }, { "epoch": 11.458895705521472, "grad_norm": 0.2563067674636841, "learning_rate": 2.0695048240260146e-05, "loss": 0.7504047155380249, "step": 9339 }, { "epoch": 11.460122699386503, "grad_norm": 0.26972439885139465, "learning_rate": 2.0690104141489865e-05, "loss": 0.7072376608848572, "step": 9340 }, { "epoch": 11.461349693251535, "grad_norm": 0.2950791120529175, "learning_rate": 2.0685160216439202e-05, "loss": 0.6976925730705261, "step": 9341 }, { "epoch": 11.462576687116565, "grad_norm": 0.22856341302394867, "learning_rate": 2.0680216465307422e-05, "loss": 0.5126458406448364, "step": 9342 }, { "epoch": 11.463803680981595, "grad_norm": 0.26830539107322693, "learning_rate": 2.06752728882938e-05, "loss": 0.7315611839294434, "step": 9343 }, { "epoch": 11.465030674846625, "grad_norm": 0.2752731144428253, "learning_rate": 2.0670329485597585e-05, "loss": 0.45842957496643066, "step": 9344 }, { "epoch": 11.466257668711656, "grad_norm": 0.2403196543455124, "learning_rate": 2.0665386257418045e-05, "loss": 0.7759658694267273, "step": 9345 }, { "epoch": 11.467484662576688, "grad_norm": 0.2990628480911255, "learning_rate": 2.0660443203954425e-05, "loss": 0.6212067604064941, "step": 9346 }, { "epoch": 11.468711656441718, "grad_norm": 0.33326271176338196, "learning_rate": 2.0655500325405958e-05, "loss": 0.5586956739425659, "step": 9347 }, { "epoch": 11.469938650306748, "grad_norm": 0.25740453600883484, "learning_rate": 2.0650557621971883e-05, "loss": 0.5410353541374207, "step": 9348 }, { "epoch": 11.47116564417178, "grad_norm": 0.2282267063856125, "learning_rate": 2.0645615093851425e-05, "loss": 0.7109839916229248, "step": 9349 }, { "epoch": 11.47239263803681, "grad_norm": 0.3413946032524109, "learning_rate": 2.06406727412438e-05, "loss": 0.8395364284515381, "step": 9350 }, { "epoch": 11.47361963190184, "grad_norm": 0.373929888010025, "learning_rate": 2.0635730564348232e-05, "loss": 0.604036271572113, "step": 9351 }, { "epoch": 11.47484662576687, "grad_norm": 0.2902483344078064, "learning_rate": 2.0630788563363913e-05, "loss": 0.6180020570755005, "step": 9352 }, { "epoch": 11.476073619631903, "grad_norm": 0.28216302394866943, "learning_rate": 2.0625846738490047e-05, "loss": 0.5897584557533264, "step": 9353 }, { "epoch": 11.477300613496933, "grad_norm": 0.28890788555145264, "learning_rate": 2.062090508992582e-05, "loss": 0.4799988865852356, "step": 9354 }, { "epoch": 11.478527607361963, "grad_norm": 0.24473950266838074, "learning_rate": 2.0615963617870415e-05, "loss": 0.6598401069641113, "step": 9355 }, { "epoch": 11.479754601226993, "grad_norm": 0.3592380881309509, "learning_rate": 2.0611022322523017e-05, "loss": 0.44731467962265015, "step": 9356 }, { "epoch": 11.480981595092025, "grad_norm": 0.2730323076248169, "learning_rate": 2.0606081204082797e-05, "loss": 0.6205602288246155, "step": 9357 }, { "epoch": 11.482208588957056, "grad_norm": 0.2626747190952301, "learning_rate": 2.0601140262748906e-05, "loss": 0.7281952500343323, "step": 9358 }, { "epoch": 11.483435582822086, "grad_norm": 0.2704238295555115, "learning_rate": 2.0596199498720506e-05, "loss": 0.5607415437698364, "step": 9359 }, { "epoch": 11.484662576687116, "grad_norm": 0.3263934552669525, "learning_rate": 2.059125891219675e-05, "loss": 0.6650562286376953, "step": 9360 }, { "epoch": 11.485889570552148, "grad_norm": 0.34521597623825073, "learning_rate": 2.058631850337677e-05, "loss": 0.3624277114868164, "step": 9361 }, { "epoch": 11.487116564417178, "grad_norm": 0.2448592334985733, "learning_rate": 2.05813782724597e-05, "loss": 0.6554570198059082, "step": 9362 }, { "epoch": 11.488343558282208, "grad_norm": 0.30270808935165405, "learning_rate": 2.0576438219644668e-05, "loss": 0.4420592188835144, "step": 9363 }, { "epoch": 11.489570552147239, "grad_norm": 0.2795056700706482, "learning_rate": 2.0571498345130797e-05, "loss": 0.7561312913894653, "step": 9364 }, { "epoch": 11.49079754601227, "grad_norm": 0.26761579513549805, "learning_rate": 2.05665586491172e-05, "loss": 0.7725141048431396, "step": 9365 }, { "epoch": 11.4920245398773, "grad_norm": 0.2994218170642853, "learning_rate": 2.0561619131802977e-05, "loss": 0.7578492164611816, "step": 9366 }, { "epoch": 11.493251533742331, "grad_norm": 0.2558352053165436, "learning_rate": 2.0556679793387235e-05, "loss": 0.5819356441497803, "step": 9367 }, { "epoch": 11.494478527607361, "grad_norm": 0.26097217202186584, "learning_rate": 2.055174063406905e-05, "loss": 0.5726717114448547, "step": 9368 }, { "epoch": 11.495705521472393, "grad_norm": 0.29533684253692627, "learning_rate": 2.0546801654047512e-05, "loss": 0.6083731651306152, "step": 9369 }, { "epoch": 11.496932515337424, "grad_norm": 0.3112885355949402, "learning_rate": 2.0541862853521704e-05, "loss": 0.8101844787597656, "step": 9370 }, { "epoch": 11.498159509202454, "grad_norm": 0.308598130941391, "learning_rate": 2.0536924232690686e-05, "loss": 0.5685991048812866, "step": 9371 }, { "epoch": 11.499386503067484, "grad_norm": 0.34642359614372253, "learning_rate": 2.053198579175353e-05, "loss": 0.5280252695083618, "step": 9372 }, { "epoch": 11.500613496932516, "grad_norm": 0.2950928509235382, "learning_rate": 2.052704753090927e-05, "loss": 0.5793638229370117, "step": 9373 }, { "epoch": 11.501840490797546, "grad_norm": 0.2510690987110138, "learning_rate": 2.0522109450356975e-05, "loss": 0.6522173881530762, "step": 9374 }, { "epoch": 11.503067484662576, "grad_norm": 0.238764226436615, "learning_rate": 2.0517171550295677e-05, "loss": 0.7840215563774109, "step": 9375 }, { "epoch": 11.504294478527607, "grad_norm": 0.23306171596050262, "learning_rate": 2.0512233830924403e-05, "loss": 0.5031351447105408, "step": 9376 }, { "epoch": 11.505521472392639, "grad_norm": 0.26349931955337524, "learning_rate": 2.050729629244219e-05, "loss": 0.5233545303344727, "step": 9377 }, { "epoch": 11.506748466257669, "grad_norm": 0.2637661099433899, "learning_rate": 2.050235893504804e-05, "loss": 0.7353044152259827, "step": 9378 }, { "epoch": 11.5079754601227, "grad_norm": 0.2510242462158203, "learning_rate": 2.0497421758940984e-05, "loss": 0.5984749794006348, "step": 9379 }, { "epoch": 11.50920245398773, "grad_norm": 0.2744198441505432, "learning_rate": 2.049248476432001e-05, "loss": 0.7102622389793396, "step": 9380 }, { "epoch": 11.510429447852761, "grad_norm": 0.26685479283332825, "learning_rate": 2.0487547951384122e-05, "loss": 0.5843207836151123, "step": 9381 }, { "epoch": 11.511656441717792, "grad_norm": 0.27289533615112305, "learning_rate": 2.0482611320332305e-05, "loss": 0.5883883833885193, "step": 9382 }, { "epoch": 11.512883435582822, "grad_norm": 0.28434035181999207, "learning_rate": 2.0477674871363546e-05, "loss": 0.841903030872345, "step": 9383 }, { "epoch": 11.514110429447852, "grad_norm": 0.28151190280914307, "learning_rate": 2.0472738604676813e-05, "loss": 0.4928404688835144, "step": 9384 }, { "epoch": 11.515337423312884, "grad_norm": 0.23670370876789093, "learning_rate": 2.0467802520471073e-05, "loss": 0.7138738036155701, "step": 9385 }, { "epoch": 11.516564417177914, "grad_norm": 0.4034973382949829, "learning_rate": 2.0462866618945285e-05, "loss": 0.521658182144165, "step": 9386 }, { "epoch": 11.517791411042944, "grad_norm": 0.3060170114040375, "learning_rate": 2.045793090029841e-05, "loss": 0.5650781393051147, "step": 9387 }, { "epoch": 11.519018404907975, "grad_norm": 0.2865802049636841, "learning_rate": 2.0452995364729377e-05, "loss": 0.7655676007270813, "step": 9388 }, { "epoch": 11.520245398773007, "grad_norm": 0.29759857058525085, "learning_rate": 2.0448060012437143e-05, "loss": 0.5532960295677185, "step": 9389 }, { "epoch": 11.521472392638037, "grad_norm": 0.37333324551582336, "learning_rate": 2.0443124843620617e-05, "loss": 0.5561946630477905, "step": 9390 }, { "epoch": 11.522699386503067, "grad_norm": 0.2919309437274933, "learning_rate": 2.0438189858478734e-05, "loss": 0.38188934326171875, "step": 9391 }, { "epoch": 11.523926380368097, "grad_norm": 0.3072443902492523, "learning_rate": 2.0433255057210414e-05, "loss": 0.471201092004776, "step": 9392 }, { "epoch": 11.52515337423313, "grad_norm": 0.3813095688819885, "learning_rate": 2.0428320440014555e-05, "loss": 0.5484168529510498, "step": 9393 }, { "epoch": 11.52638036809816, "grad_norm": 0.253089040517807, "learning_rate": 2.042338600709006e-05, "loss": 0.6214956045150757, "step": 9394 }, { "epoch": 11.52760736196319, "grad_norm": 0.3038380444049835, "learning_rate": 2.0418451758635817e-05, "loss": 0.4359402358531952, "step": 9395 }, { "epoch": 11.52883435582822, "grad_norm": 0.30760642886161804, "learning_rate": 2.041351769485072e-05, "loss": 0.5862472057342529, "step": 9396 }, { "epoch": 11.530061349693252, "grad_norm": 0.809357225894928, "learning_rate": 2.0408583815933645e-05, "loss": 0.5387735366821289, "step": 9397 }, { "epoch": 11.531288343558282, "grad_norm": 0.29692089557647705, "learning_rate": 2.040365012208346e-05, "loss": 0.7863813638687134, "step": 9398 }, { "epoch": 11.532515337423312, "grad_norm": 0.3128008544445038, "learning_rate": 2.0398716613499034e-05, "loss": 0.27055537700653076, "step": 9399 }, { "epoch": 11.533742331288344, "grad_norm": 0.39796414971351624, "learning_rate": 2.03937832903792e-05, "loss": 0.498249888420105, "step": 9400 }, { "epoch": 11.534969325153375, "grad_norm": 0.321662962436676, "learning_rate": 2.038885015292284e-05, "loss": 0.5111514329910278, "step": 9401 }, { "epoch": 11.536196319018405, "grad_norm": 0.34058302640914917, "learning_rate": 2.0383917201328777e-05, "loss": 0.5980585813522339, "step": 9402 }, { "epoch": 11.537423312883435, "grad_norm": 0.2884189188480377, "learning_rate": 2.037898443579585e-05, "loss": 0.6672163605690002, "step": 9403 }, { "epoch": 11.538650306748465, "grad_norm": 0.3620434105396271, "learning_rate": 2.0374051856522875e-05, "loss": 0.3795883059501648, "step": 9404 }, { "epoch": 11.539877300613497, "grad_norm": 0.2566353678703308, "learning_rate": 2.0369119463708675e-05, "loss": 0.6739077568054199, "step": 9405 }, { "epoch": 11.541104294478528, "grad_norm": 0.25380539894104004, "learning_rate": 2.036418725755207e-05, "loss": 0.596896231174469, "step": 9406 }, { "epoch": 11.542331288343558, "grad_norm": 0.26162490248680115, "learning_rate": 2.0359255238251845e-05, "loss": 0.7358465194702148, "step": 9407 }, { "epoch": 11.54355828220859, "grad_norm": 0.26651403307914734, "learning_rate": 2.0354323406006814e-05, "loss": 0.7024576663970947, "step": 9408 }, { "epoch": 11.54478527607362, "grad_norm": 0.3321142792701721, "learning_rate": 2.0349391761015753e-05, "loss": 0.5952036380767822, "step": 9409 }, { "epoch": 11.54601226993865, "grad_norm": 0.36566194891929626, "learning_rate": 2.0344460303477443e-05, "loss": 0.5932924747467041, "step": 9410 }, { "epoch": 11.54723926380368, "grad_norm": 0.2789744436740875, "learning_rate": 2.0339529033590664e-05, "loss": 0.7972404956817627, "step": 9411 }, { "epoch": 11.548466257668712, "grad_norm": 0.29079049825668335, "learning_rate": 2.0334597951554176e-05, "loss": 0.3719482123851776, "step": 9412 }, { "epoch": 11.549693251533743, "grad_norm": 0.27248382568359375, "learning_rate": 2.0329667057566744e-05, "loss": 0.6171004772186279, "step": 9413 }, { "epoch": 11.550920245398773, "grad_norm": 0.3182624280452728, "learning_rate": 2.0324736351827105e-05, "loss": 0.5079494714736938, "step": 9414 }, { "epoch": 11.552147239263803, "grad_norm": 0.2820869982242584, "learning_rate": 2.0319805834534012e-05, "loss": 0.732227087020874, "step": 9415 }, { "epoch": 11.553374233128835, "grad_norm": 0.264681875705719, "learning_rate": 2.03148755058862e-05, "loss": 0.767892062664032, "step": 9416 }, { "epoch": 11.554601226993865, "grad_norm": 0.2695039212703705, "learning_rate": 2.0309945366082388e-05, "loss": 0.6887350678443909, "step": 9417 }, { "epoch": 11.555828220858896, "grad_norm": 0.2628953456878662, "learning_rate": 2.030501541532131e-05, "loss": 0.5827364325523376, "step": 9418 }, { "epoch": 11.557055214723926, "grad_norm": 0.2785290777683258, "learning_rate": 2.0300085653801664e-05, "loss": 0.8928682208061218, "step": 9419 }, { "epoch": 11.558282208588958, "grad_norm": 0.24695786833763123, "learning_rate": 2.029515608172216e-05, "loss": 0.8725056648254395, "step": 9420 }, { "epoch": 11.559509202453988, "grad_norm": 0.2894009053707123, "learning_rate": 2.02902266992815e-05, "loss": 0.5339688062667847, "step": 9421 }, { "epoch": 11.560736196319018, "grad_norm": 0.2718128263950348, "learning_rate": 2.028529750667836e-05, "loss": 0.6160972714424133, "step": 9422 }, { "epoch": 11.561963190184048, "grad_norm": 0.3313506841659546, "learning_rate": 2.0280368504111434e-05, "loss": 0.44851404428482056, "step": 9423 }, { "epoch": 11.56319018404908, "grad_norm": 0.2723330557346344, "learning_rate": 2.0275439691779396e-05, "loss": 0.7232380509376526, "step": 9424 }, { "epoch": 11.56441717791411, "grad_norm": 0.30064597725868225, "learning_rate": 2.0270511069880915e-05, "loss": 0.5682557225227356, "step": 9425 }, { "epoch": 11.565644171779141, "grad_norm": 0.34825941920280457, "learning_rate": 2.0265582638614635e-05, "loss": 0.5044159889221191, "step": 9426 }, { "epoch": 11.566871165644171, "grad_norm": 0.2418493628501892, "learning_rate": 2.0260654398179223e-05, "loss": 0.7095415592193604, "step": 9427 }, { "epoch": 11.568098159509203, "grad_norm": 0.27349311113357544, "learning_rate": 2.025572634877331e-05, "loss": 0.8009089231491089, "step": 9428 }, { "epoch": 11.569325153374233, "grad_norm": 0.27373290061950684, "learning_rate": 2.0250798490595534e-05, "loss": 0.6081798076629639, "step": 9429 }, { "epoch": 11.570552147239264, "grad_norm": 0.3226366937160492, "learning_rate": 2.0245870823844536e-05, "loss": 0.7386517524719238, "step": 9430 }, { "epoch": 11.571779141104294, "grad_norm": 0.34610721468925476, "learning_rate": 2.0240943348718916e-05, "loss": 0.3887348771095276, "step": 9431 }, { "epoch": 11.573006134969326, "grad_norm": 0.3154914379119873, "learning_rate": 2.02360160654173e-05, "loss": 0.6280910968780518, "step": 9432 }, { "epoch": 11.574233128834356, "grad_norm": 0.33398348093032837, "learning_rate": 2.0231088974138292e-05, "loss": 0.5135679244995117, "step": 9433 }, { "epoch": 11.575460122699386, "grad_norm": 0.30095967650413513, "learning_rate": 2.022616207508048e-05, "loss": 0.6187642812728882, "step": 9434 }, { "epoch": 11.576687116564417, "grad_norm": 0.2396293729543686, "learning_rate": 2.0221235368442467e-05, "loss": 0.5766950845718384, "step": 9435 }, { "epoch": 11.577914110429449, "grad_norm": 0.2907046377658844, "learning_rate": 2.0216308854422822e-05, "loss": 0.4932151436805725, "step": 9436 }, { "epoch": 11.579141104294479, "grad_norm": 0.25163576006889343, "learning_rate": 2.021138253322012e-05, "loss": 0.5383325219154358, "step": 9437 }, { "epoch": 11.580368098159509, "grad_norm": 0.23944438993930817, "learning_rate": 2.020645640503294e-05, "loss": 0.5389879941940308, "step": 9438 }, { "epoch": 11.58159509202454, "grad_norm": 0.23444786667823792, "learning_rate": 2.0201530470059824e-05, "loss": 0.5172325968742371, "step": 9439 }, { "epoch": 11.582822085889571, "grad_norm": 0.2937139570713043, "learning_rate": 2.0196604728499336e-05, "loss": 0.5537399053573608, "step": 9440 }, { "epoch": 11.584049079754601, "grad_norm": 0.2980753779411316, "learning_rate": 2.0191679180550003e-05, "loss": 0.7488588690757751, "step": 9441 }, { "epoch": 11.585276073619632, "grad_norm": 0.3058694005012512, "learning_rate": 2.018675382641037e-05, "loss": 0.7228178977966309, "step": 9442 }, { "epoch": 11.586503067484662, "grad_norm": 0.25299134850502014, "learning_rate": 2.018182866627897e-05, "loss": 0.5294713973999023, "step": 9443 }, { "epoch": 11.587730061349694, "grad_norm": 0.27510926127433777, "learning_rate": 2.01769037003543e-05, "loss": 0.5511665344238281, "step": 9444 }, { "epoch": 11.588957055214724, "grad_norm": 0.3245936632156372, "learning_rate": 2.017197892883489e-05, "loss": 0.6281130313873291, "step": 9445 }, { "epoch": 11.590184049079754, "grad_norm": 0.3020489811897278, "learning_rate": 2.016705435191924e-05, "loss": 0.8185979127883911, "step": 9446 }, { "epoch": 11.591411042944785, "grad_norm": 0.28306809067726135, "learning_rate": 2.0162129969805854e-05, "loss": 0.5360640287399292, "step": 9447 }, { "epoch": 11.592638036809817, "grad_norm": 0.25161710381507874, "learning_rate": 2.0157205782693207e-05, "loss": 0.7686951160430908, "step": 9448 }, { "epoch": 11.593865030674847, "grad_norm": 0.351236492395401, "learning_rate": 2.0152281790779783e-05, "loss": 0.5159231424331665, "step": 9449 }, { "epoch": 11.595092024539877, "grad_norm": 0.21305429935455322, "learning_rate": 2.014735799426405e-05, "loss": 0.6355855464935303, "step": 9450 }, { "epoch": 11.596319018404907, "grad_norm": 0.29463082551956177, "learning_rate": 2.0142434393344477e-05, "loss": 0.6257409453392029, "step": 9451 }, { "epoch": 11.59754601226994, "grad_norm": 0.2661876678466797, "learning_rate": 2.013751098821953e-05, "loss": 0.4818795323371887, "step": 9452 }, { "epoch": 11.59877300613497, "grad_norm": 0.2176758497953415, "learning_rate": 2.0132587779087632e-05, "loss": 0.6362789869308472, "step": 9453 }, { "epoch": 11.6, "grad_norm": 0.33325913548469543, "learning_rate": 2.012766476614725e-05, "loss": 0.7014572024345398, "step": 9454 }, { "epoch": 11.60122699386503, "grad_norm": 0.2957634925842285, "learning_rate": 2.0122741949596797e-05, "loss": 0.5561391115188599, "step": 9455 }, { "epoch": 11.602453987730062, "grad_norm": 0.3479072153568268, "learning_rate": 2.0117819329634704e-05, "loss": 0.5768741965293884, "step": 9456 }, { "epoch": 11.603680981595092, "grad_norm": 0.36134418845176697, "learning_rate": 2.0112896906459397e-05, "loss": 0.576228141784668, "step": 9457 }, { "epoch": 11.604907975460122, "grad_norm": 0.28439006209373474, "learning_rate": 2.0107974680269274e-05, "loss": 0.6052560210227966, "step": 9458 }, { "epoch": 11.606134969325154, "grad_norm": 0.317222535610199, "learning_rate": 2.010305265126274e-05, "loss": 0.4590951204299927, "step": 9459 }, { "epoch": 11.607361963190185, "grad_norm": 0.2765842080116272, "learning_rate": 2.0098130819638183e-05, "loss": 0.7151995897293091, "step": 9460 }, { "epoch": 11.608588957055215, "grad_norm": 0.2869534492492676, "learning_rate": 2.0093209185593996e-05, "loss": 0.6493178009986877, "step": 9461 }, { "epoch": 11.609815950920245, "grad_norm": 0.29396727681159973, "learning_rate": 2.0088287749328554e-05, "loss": 0.6348223686218262, "step": 9462 }, { "epoch": 11.611042944785275, "grad_norm": 0.3436482846736908, "learning_rate": 2.008336651104022e-05, "loss": 0.6892231702804565, "step": 9463 }, { "epoch": 11.612269938650307, "grad_norm": 0.2582024931907654, "learning_rate": 2.007844547092736e-05, "loss": 0.6404186487197876, "step": 9464 }, { "epoch": 11.613496932515337, "grad_norm": 0.31951993703842163, "learning_rate": 2.0073524629188327e-05, "loss": 0.3833562731742859, "step": 9465 }, { "epoch": 11.614723926380368, "grad_norm": 0.2899370789527893, "learning_rate": 2.0068603986021463e-05, "loss": 0.536142885684967, "step": 9466 }, { "epoch": 11.6159509202454, "grad_norm": 0.27081987261772156, "learning_rate": 2.0063683541625104e-05, "loss": 0.4961429834365845, "step": 9467 }, { "epoch": 11.61717791411043, "grad_norm": 0.25992289185523987, "learning_rate": 2.005876329619759e-05, "loss": 0.6269103288650513, "step": 9468 }, { "epoch": 11.61840490797546, "grad_norm": 0.2989911139011383, "learning_rate": 2.0053843249937232e-05, "loss": 0.5793657302856445, "step": 9469 }, { "epoch": 11.61963190184049, "grad_norm": 0.34804943203926086, "learning_rate": 2.0048923403042346e-05, "loss": 0.4973931312561035, "step": 9470 }, { "epoch": 11.62085889570552, "grad_norm": 0.2474181354045868, "learning_rate": 2.004400375571124e-05, "loss": 0.673668622970581, "step": 9471 }, { "epoch": 11.622085889570553, "grad_norm": 0.305021733045578, "learning_rate": 2.0039084308142203e-05, "loss": 0.8168718814849854, "step": 9472 }, { "epoch": 11.623312883435583, "grad_norm": 0.27990415692329407, "learning_rate": 2.0034165060533534e-05, "loss": 0.5184601545333862, "step": 9473 }, { "epoch": 11.624539877300613, "grad_norm": 0.24596722424030304, "learning_rate": 2.0029246013083505e-05, "loss": 0.7679339647293091, "step": 9474 }, { "epoch": 11.625766871165645, "grad_norm": 0.2480127215385437, "learning_rate": 2.0024327165990388e-05, "loss": 0.48435407876968384, "step": 9475 }, { "epoch": 11.626993865030675, "grad_norm": 0.319297194480896, "learning_rate": 2.001940851945246e-05, "loss": 0.6504848599433899, "step": 9476 }, { "epoch": 11.628220858895705, "grad_norm": 0.2646480202674866, "learning_rate": 2.0014490073667965e-05, "loss": 0.5176653265953064, "step": 9477 }, { "epoch": 11.629447852760736, "grad_norm": 0.31891748309135437, "learning_rate": 2.0009571828835162e-05, "loss": 0.6191372275352478, "step": 9478 }, { "epoch": 11.630674846625768, "grad_norm": 0.27417290210723877, "learning_rate": 2.0004653785152278e-05, "loss": 0.6458055973052979, "step": 9479 }, { "epoch": 11.631901840490798, "grad_norm": 0.28792497515678406, "learning_rate": 1.999973594281755e-05, "loss": 0.6352684497833252, "step": 9480 }, { "epoch": 11.633128834355828, "grad_norm": 0.2923356294631958, "learning_rate": 1.9994818302029218e-05, "loss": 0.6065963506698608, "step": 9481 }, { "epoch": 11.634355828220858, "grad_norm": 0.34754806756973267, "learning_rate": 1.998990086298547e-05, "loss": 0.5647860765457153, "step": 9482 }, { "epoch": 11.63558282208589, "grad_norm": 0.2491975724697113, "learning_rate": 1.9984983625884536e-05, "loss": 0.5652453303337097, "step": 9483 }, { "epoch": 11.63680981595092, "grad_norm": 0.30858731269836426, "learning_rate": 1.9980066590924613e-05, "loss": 0.48818880319595337, "step": 9484 }, { "epoch": 11.63803680981595, "grad_norm": 0.23447836935520172, "learning_rate": 1.9975149758303883e-05, "loss": 0.6104415655136108, "step": 9485 }, { "epoch": 11.639263803680981, "grad_norm": 0.2574448883533478, "learning_rate": 1.9970233128220538e-05, "loss": 0.48904162645339966, "step": 9486 }, { "epoch": 11.640490797546013, "grad_norm": 0.28359365463256836, "learning_rate": 1.9965316700872747e-05, "loss": 0.5265355110168457, "step": 9487 }, { "epoch": 11.641717791411043, "grad_norm": 0.31354570388793945, "learning_rate": 1.996040047645868e-05, "loss": 0.4511997103691101, "step": 9488 }, { "epoch": 11.642944785276073, "grad_norm": 0.2963505983352661, "learning_rate": 1.995548445517649e-05, "loss": 0.6012418270111084, "step": 9489 }, { "epoch": 11.644171779141104, "grad_norm": 0.279703825712204, "learning_rate": 1.9950568637224344e-05, "loss": 0.5824288129806519, "step": 9490 }, { "epoch": 11.645398773006136, "grad_norm": 0.25844860076904297, "learning_rate": 1.9945653022800374e-05, "loss": 0.778069257736206, "step": 9491 }, { "epoch": 11.646625766871166, "grad_norm": 0.3883349895477295, "learning_rate": 1.994073761210271e-05, "loss": 0.6040104627609253, "step": 9492 }, { "epoch": 11.647852760736196, "grad_norm": 0.2978866994380951, "learning_rate": 1.9935822405329494e-05, "loss": 0.5667875409126282, "step": 9493 }, { "epoch": 11.649079754601226, "grad_norm": 0.2663370966911316, "learning_rate": 1.993090740267882e-05, "loss": 0.683660626411438, "step": 9494 }, { "epoch": 11.650306748466258, "grad_norm": 0.3150114119052887, "learning_rate": 1.9925992604348826e-05, "loss": 0.12364647537469864, "step": 9495 }, { "epoch": 11.651533742331289, "grad_norm": 0.2982626259326935, "learning_rate": 1.992107801053759e-05, "loss": 0.4423064589500427, "step": 9496 }, { "epoch": 11.652760736196319, "grad_norm": 0.29965320229530334, "learning_rate": 1.991616362144321e-05, "loss": 0.6903239488601685, "step": 9497 }, { "epoch": 11.653987730061349, "grad_norm": 0.2923734784126282, "learning_rate": 1.9911249437263785e-05, "loss": 0.7748324871063232, "step": 9498 }, { "epoch": 11.655214723926381, "grad_norm": 0.28695565462112427, "learning_rate": 1.9906335458197372e-05, "loss": 0.6528921127319336, "step": 9499 }, { "epoch": 11.656441717791411, "grad_norm": 0.26240161061286926, "learning_rate": 1.9901421684442056e-05, "loss": 0.539938747882843, "step": 9500 }, { "epoch": 11.657668711656441, "grad_norm": 0.2675973176956177, "learning_rate": 1.9896508116195885e-05, "loss": 0.5252756476402283, "step": 9501 }, { "epoch": 11.658895705521472, "grad_norm": 0.25544479489326477, "learning_rate": 1.9891594753656917e-05, "loss": 0.8239535093307495, "step": 9502 }, { "epoch": 11.660122699386504, "grad_norm": 0.2603147327899933, "learning_rate": 1.9886681597023203e-05, "loss": 0.6712019443511963, "step": 9503 }, { "epoch": 11.661349693251534, "grad_norm": 0.31161999702453613, "learning_rate": 1.988176864649276e-05, "loss": 0.5665696263313293, "step": 9504 }, { "epoch": 11.662576687116564, "grad_norm": 0.3109508752822876, "learning_rate": 1.9876855902263632e-05, "loss": 0.5757704377174377, "step": 9505 }, { "epoch": 11.663803680981594, "grad_norm": 0.35030102729797363, "learning_rate": 1.9871943364533825e-05, "loss": 0.5539990663528442, "step": 9506 }, { "epoch": 11.665030674846626, "grad_norm": 0.3554724454879761, "learning_rate": 1.986703103350136e-05, "loss": 0.3767274022102356, "step": 9507 }, { "epoch": 11.666257668711657, "grad_norm": 0.2587440609931946, "learning_rate": 1.986211890936423e-05, "loss": 0.633133053779602, "step": 9508 }, { "epoch": 11.667484662576687, "grad_norm": 0.2905822694301605, "learning_rate": 1.9857206992320436e-05, "loss": 0.6847652196884155, "step": 9509 }, { "epoch": 11.668711656441717, "grad_norm": 0.23062120378017426, "learning_rate": 1.9852295282567964e-05, "loss": 0.4706522822380066, "step": 9510 }, { "epoch": 11.669938650306749, "grad_norm": 0.2923782169818878, "learning_rate": 1.9847383780304772e-05, "loss": 0.6719933152198792, "step": 9511 }, { "epoch": 11.67116564417178, "grad_norm": 0.33204907178878784, "learning_rate": 1.9842472485728857e-05, "loss": 0.5435299873352051, "step": 9512 }, { "epoch": 11.67239263803681, "grad_norm": 0.2551460564136505, "learning_rate": 1.9837561399038165e-05, "loss": 0.5980784893035889, "step": 9513 }, { "epoch": 11.67361963190184, "grad_norm": 0.2595767080783844, "learning_rate": 1.983265052043065e-05, "loss": 0.6109302639961243, "step": 9514 }, { "epoch": 11.674846625766872, "grad_norm": 0.2935793697834015, "learning_rate": 1.9827739850104255e-05, "loss": 0.6193510293960571, "step": 9515 }, { "epoch": 11.676073619631902, "grad_norm": 0.35778191685676575, "learning_rate": 1.982282938825691e-05, "loss": 0.5696227550506592, "step": 9516 }, { "epoch": 11.677300613496932, "grad_norm": 0.23331041634082794, "learning_rate": 1.9817919135086553e-05, "loss": 0.8528544902801514, "step": 9517 }, { "epoch": 11.678527607361962, "grad_norm": 0.3206521272659302, "learning_rate": 1.9813009090791093e-05, "loss": 0.6627597808837891, "step": 9518 }, { "epoch": 11.679754601226994, "grad_norm": 0.31622326374053955, "learning_rate": 1.9808099255568446e-05, "loss": 0.5091391801834106, "step": 9519 }, { "epoch": 11.680981595092025, "grad_norm": 0.31928014755249023, "learning_rate": 1.9803189629616507e-05, "loss": 0.6110356450080872, "step": 9520 }, { "epoch": 11.682208588957055, "grad_norm": 0.30546432733535767, "learning_rate": 1.979828021313317e-05, "loss": 0.5524085164070129, "step": 9521 }, { "epoch": 11.683435582822085, "grad_norm": 0.26944562792778015, "learning_rate": 1.9793371006316328e-05, "loss": 0.7613790035247803, "step": 9522 }, { "epoch": 11.684662576687117, "grad_norm": 0.3123566210269928, "learning_rate": 1.9788462009363846e-05, "loss": 0.6802759170532227, "step": 9523 }, { "epoch": 11.685889570552147, "grad_norm": 1.5134332180023193, "learning_rate": 1.97835532224736e-05, "loss": 0.7222217321395874, "step": 9524 }, { "epoch": 11.687116564417177, "grad_norm": 0.2749325931072235, "learning_rate": 1.9778644645843443e-05, "loss": 0.8159297704696655, "step": 9525 }, { "epoch": 11.68834355828221, "grad_norm": 0.278688907623291, "learning_rate": 1.977373627967123e-05, "loss": 0.5833026170730591, "step": 9526 }, { "epoch": 11.68957055214724, "grad_norm": 0.26019254326820374, "learning_rate": 1.9768828124154802e-05, "loss": 0.5906069278717041, "step": 9527 }, { "epoch": 11.69079754601227, "grad_norm": 0.23459291458129883, "learning_rate": 1.976392017949199e-05, "loss": 0.7198741436004639, "step": 9528 }, { "epoch": 11.6920245398773, "grad_norm": 0.2992471754550934, "learning_rate": 1.9759012445880628e-05, "loss": 0.7554330825805664, "step": 9529 }, { "epoch": 11.69325153374233, "grad_norm": 0.25255316495895386, "learning_rate": 1.9754104923518517e-05, "loss": 0.5726245641708374, "step": 9530 }, { "epoch": 11.694478527607362, "grad_norm": 0.2971387207508087, "learning_rate": 1.9749197612603483e-05, "loss": 0.32727742195129395, "step": 9531 }, { "epoch": 11.695705521472393, "grad_norm": 0.25416770577430725, "learning_rate": 1.9744290513333316e-05, "loss": 0.7792820930480957, "step": 9532 }, { "epoch": 11.696932515337423, "grad_norm": 0.27302858233451843, "learning_rate": 1.97393836259058e-05, "loss": 0.7817479968070984, "step": 9533 }, { "epoch": 11.698159509202455, "grad_norm": 0.2784537374973297, "learning_rate": 1.9734476950518733e-05, "loss": 0.6348985433578491, "step": 9534 }, { "epoch": 11.699386503067485, "grad_norm": 0.28280705213546753, "learning_rate": 1.972957048736988e-05, "loss": 0.605053186416626, "step": 9535 }, { "epoch": 11.700613496932515, "grad_norm": 0.276436448097229, "learning_rate": 1.9724664236657013e-05, "loss": 0.7897372245788574, "step": 9536 }, { "epoch": 11.701840490797546, "grad_norm": 0.2718936502933502, "learning_rate": 1.971975819857788e-05, "loss": 0.6245383620262146, "step": 9537 }, { "epoch": 11.703067484662578, "grad_norm": 0.24464534223079681, "learning_rate": 1.9714852373330238e-05, "loss": 0.7959284782409668, "step": 9538 }, { "epoch": 11.704294478527608, "grad_norm": 0.22548052668571472, "learning_rate": 1.9709946761111827e-05, "loss": 0.577168881893158, "step": 9539 }, { "epoch": 11.705521472392638, "grad_norm": 0.27899491786956787, "learning_rate": 1.970504136212037e-05, "loss": 0.6920242309570312, "step": 9540 }, { "epoch": 11.706748466257668, "grad_norm": 0.27078232169151306, "learning_rate": 1.9700136176553596e-05, "loss": 0.7984822988510132, "step": 9541 }, { "epoch": 11.7079754601227, "grad_norm": 0.2863551676273346, "learning_rate": 1.9695231204609213e-05, "loss": 0.6827613711357117, "step": 9542 }, { "epoch": 11.70920245398773, "grad_norm": 0.27367204427719116, "learning_rate": 1.969032644648493e-05, "loss": 0.6443747282028198, "step": 9543 }, { "epoch": 11.71042944785276, "grad_norm": 0.3223135769367218, "learning_rate": 1.968542190237845e-05, "loss": 0.7713146209716797, "step": 9544 }, { "epoch": 11.71165644171779, "grad_norm": 0.29863640666007996, "learning_rate": 1.9680517572487454e-05, "loss": 0.8792384266853333, "step": 9545 }, { "epoch": 11.712883435582823, "grad_norm": 0.25816255807876587, "learning_rate": 1.9675613457009624e-05, "loss": 0.8757304549217224, "step": 9546 }, { "epoch": 11.714110429447853, "grad_norm": 0.2810089588165283, "learning_rate": 1.9670709556142625e-05, "loss": 0.401310533285141, "step": 9547 }, { "epoch": 11.715337423312883, "grad_norm": 0.31375348567962646, "learning_rate": 1.9665805870084127e-05, "loss": 0.7530019283294678, "step": 9548 }, { "epoch": 11.716564417177914, "grad_norm": 0.2841877341270447, "learning_rate": 1.9660902399031782e-05, "loss": 0.7391561269760132, "step": 9549 }, { "epoch": 11.717791411042946, "grad_norm": 0.27511274814605713, "learning_rate": 1.965599914318323e-05, "loss": 0.559120774269104, "step": 9550 }, { "epoch": 11.719018404907976, "grad_norm": 0.24164848029613495, "learning_rate": 1.9651096102736116e-05, "loss": 0.6302900314331055, "step": 9551 }, { "epoch": 11.720245398773006, "grad_norm": 0.2878934442996979, "learning_rate": 1.964619327788806e-05, "loss": 0.460774689912796, "step": 9552 }, { "epoch": 11.721472392638036, "grad_norm": 0.280567467212677, "learning_rate": 1.964129066883668e-05, "loss": 0.5491126179695129, "step": 9553 }, { "epoch": 11.722699386503068, "grad_norm": 0.23726464807987213, "learning_rate": 1.9636388275779597e-05, "loss": 0.8931014537811279, "step": 9554 }, { "epoch": 11.723926380368098, "grad_norm": 0.2800522744655609, "learning_rate": 1.96314860989144e-05, "loss": 0.7249869704246521, "step": 9555 }, { "epoch": 11.725153374233129, "grad_norm": 0.3555373549461365, "learning_rate": 1.9626584138438678e-05, "loss": 0.5293261408805847, "step": 9556 }, { "epoch": 11.726380368098159, "grad_norm": 0.22667278349399567, "learning_rate": 1.962168239455003e-05, "loss": 0.7842793464660645, "step": 9557 }, { "epoch": 11.72760736196319, "grad_norm": 0.2850303649902344, "learning_rate": 1.961678086744603e-05, "loss": 0.6098488569259644, "step": 9558 }, { "epoch": 11.728834355828221, "grad_norm": 0.25231656432151794, "learning_rate": 1.961187955732424e-05, "loss": 0.677904486656189, "step": 9559 }, { "epoch": 11.730061349693251, "grad_norm": 0.3008352518081665, "learning_rate": 1.960697846438222e-05, "loss": 0.648263692855835, "step": 9560 }, { "epoch": 11.731288343558282, "grad_norm": 0.2854204773902893, "learning_rate": 1.9602077588817515e-05, "loss": 0.6694426536560059, "step": 9561 }, { "epoch": 11.732515337423314, "grad_norm": 0.29160967469215393, "learning_rate": 1.959717693082766e-05, "loss": 0.6921350955963135, "step": 9562 }, { "epoch": 11.733742331288344, "grad_norm": 0.2743208706378937, "learning_rate": 1.95922764906102e-05, "loss": 0.679353654384613, "step": 9563 }, { "epoch": 11.734969325153374, "grad_norm": 0.2936701476573944, "learning_rate": 1.958737626836265e-05, "loss": 0.6820969581604004, "step": 9564 }, { "epoch": 11.736196319018404, "grad_norm": 0.24143485724925995, "learning_rate": 1.9582476264282528e-05, "loss": 0.5923463106155396, "step": 9565 }, { "epoch": 11.737423312883436, "grad_norm": 0.2324918955564499, "learning_rate": 1.957757647856733e-05, "loss": 0.6554230451583862, "step": 9566 }, { "epoch": 11.738650306748466, "grad_norm": 0.2540196180343628, "learning_rate": 1.9572676911414564e-05, "loss": 0.6194184422492981, "step": 9567 }, { "epoch": 11.739877300613497, "grad_norm": 0.2737759053707123, "learning_rate": 1.9567777563021715e-05, "loss": 0.560164213180542, "step": 9568 }, { "epoch": 11.741104294478527, "grad_norm": 0.2664704918861389, "learning_rate": 1.9562878433586254e-05, "loss": 0.7046476602554321, "step": 9569 }, { "epoch": 11.742331288343559, "grad_norm": 0.27176526188850403, "learning_rate": 1.955797952330566e-05, "loss": 0.6189674139022827, "step": 9570 }, { "epoch": 11.743558282208589, "grad_norm": 0.2511366605758667, "learning_rate": 1.9553080832377386e-05, "loss": 0.7684981226921082, "step": 9571 }, { "epoch": 11.74478527607362, "grad_norm": 0.31299206614494324, "learning_rate": 1.954818236099889e-05, "loss": 0.5276795029640198, "step": 9572 }, { "epoch": 11.74601226993865, "grad_norm": 0.2349231243133545, "learning_rate": 1.9543284109367616e-05, "loss": 0.6548599004745483, "step": 9573 }, { "epoch": 11.747239263803682, "grad_norm": 0.31056779623031616, "learning_rate": 1.9538386077680996e-05, "loss": 0.5448459982872009, "step": 9574 }, { "epoch": 11.748466257668712, "grad_norm": 0.25658729672431946, "learning_rate": 1.9533488266136457e-05, "loss": 0.7039389610290527, "step": 9575 }, { "epoch": 11.749693251533742, "grad_norm": 0.28424862027168274, "learning_rate": 1.952859067493141e-05, "loss": 0.4354018568992615, "step": 9576 }, { "epoch": 11.750920245398772, "grad_norm": 0.21490901708602905, "learning_rate": 1.9523693304263272e-05, "loss": 0.5772041082382202, "step": 9577 }, { "epoch": 11.752147239263804, "grad_norm": 0.2935755252838135, "learning_rate": 1.9518796154329426e-05, "loss": 0.6677733659744263, "step": 9578 }, { "epoch": 11.753374233128834, "grad_norm": 0.3011758625507355, "learning_rate": 1.951389922532729e-05, "loss": 0.5572003722190857, "step": 9579 }, { "epoch": 11.754601226993865, "grad_norm": 0.27404549717903137, "learning_rate": 1.950900251745422e-05, "loss": 0.5751610398292542, "step": 9580 }, { "epoch": 11.755828220858895, "grad_norm": 0.35688239336013794, "learning_rate": 1.9504106030907604e-05, "loss": 0.6214532852172852, "step": 9581 }, { "epoch": 11.757055214723927, "grad_norm": 0.281511515378952, "learning_rate": 1.94992097658848e-05, "loss": 0.6674105525016785, "step": 9582 }, { "epoch": 11.758282208588957, "grad_norm": 0.26568925380706787, "learning_rate": 1.9494313722583156e-05, "loss": 0.6563834547996521, "step": 9583 }, { "epoch": 11.759509202453987, "grad_norm": 0.2649722099304199, "learning_rate": 1.9489417901200033e-05, "loss": 0.5728963613510132, "step": 9584 }, { "epoch": 11.76073619631902, "grad_norm": 0.264860600233078, "learning_rate": 1.9484522301932747e-05, "loss": 0.6471493244171143, "step": 9585 }, { "epoch": 11.76196319018405, "grad_norm": 0.2504892647266388, "learning_rate": 1.947962692497864e-05, "loss": 0.6854009628295898, "step": 9586 }, { "epoch": 11.76319018404908, "grad_norm": 0.3952023983001709, "learning_rate": 1.9474731770535033e-05, "loss": 0.5711678266525269, "step": 9587 }, { "epoch": 11.76441717791411, "grad_norm": 0.2542736232280731, "learning_rate": 1.946983683879922e-05, "loss": 0.6619305610656738, "step": 9588 }, { "epoch": 11.76564417177914, "grad_norm": 0.22710715234279633, "learning_rate": 1.9464942129968514e-05, "loss": 0.5482107400894165, "step": 9589 }, { "epoch": 11.766871165644172, "grad_norm": 0.29779502749443054, "learning_rate": 1.946004764424021e-05, "loss": 0.5091888904571533, "step": 9590 }, { "epoch": 11.768098159509202, "grad_norm": 0.2730807065963745, "learning_rate": 1.9455153381811578e-05, "loss": 0.6803940534591675, "step": 9591 }, { "epoch": 11.769325153374233, "grad_norm": 0.2866663634777069, "learning_rate": 1.9450259342879906e-05, "loss": 0.5809179544448853, "step": 9592 }, { "epoch": 11.770552147239265, "grad_norm": 0.2671222984790802, "learning_rate": 1.9445365527642447e-05, "loss": 0.6489564180374146, "step": 9593 }, { "epoch": 11.771779141104295, "grad_norm": 0.30492928624153137, "learning_rate": 1.9440471936296456e-05, "loss": 0.5565899610519409, "step": 9594 }, { "epoch": 11.773006134969325, "grad_norm": 0.25718042254447937, "learning_rate": 1.9435578569039194e-05, "loss": 0.6882932782173157, "step": 9595 }, { "epoch": 11.774233128834355, "grad_norm": 0.29672878980636597, "learning_rate": 1.9430685426067885e-05, "loss": 0.6158469915390015, "step": 9596 }, { "epoch": 11.775460122699386, "grad_norm": 0.24642036855220795, "learning_rate": 1.9425792507579768e-05, "loss": 0.6045433878898621, "step": 9597 }, { "epoch": 11.776687116564418, "grad_norm": 0.37042710185050964, "learning_rate": 1.942089981377205e-05, "loss": 0.4428582191467285, "step": 9598 }, { "epoch": 11.777914110429448, "grad_norm": 0.2620928883552551, "learning_rate": 1.941600734484195e-05, "loss": 0.6016508340835571, "step": 9599 }, { "epoch": 11.779141104294478, "grad_norm": 0.3262258768081665, "learning_rate": 1.941111510098666e-05, "loss": 0.6491349935531616, "step": 9600 }, { "epoch": 11.78036809815951, "grad_norm": 0.25819510221481323, "learning_rate": 1.9406223082403394e-05, "loss": 0.6551740169525146, "step": 9601 }, { "epoch": 11.78159509202454, "grad_norm": 0.27164044976234436, "learning_rate": 1.9401331289289312e-05, "loss": 0.7906754612922668, "step": 9602 }, { "epoch": 11.78282208588957, "grad_norm": 0.33813783526420593, "learning_rate": 1.9396439721841603e-05, "loss": 0.6731966733932495, "step": 9603 }, { "epoch": 11.7840490797546, "grad_norm": 0.24848490953445435, "learning_rate": 1.939154838025743e-05, "loss": 0.6606581211090088, "step": 9604 }, { "epoch": 11.785276073619633, "grad_norm": 0.30070072412490845, "learning_rate": 1.9386657264733938e-05, "loss": 0.6708749532699585, "step": 9605 }, { "epoch": 11.786503067484663, "grad_norm": 0.2944997251033783, "learning_rate": 1.938176637546829e-05, "loss": 0.4783540964126587, "step": 9606 }, { "epoch": 11.787730061349693, "grad_norm": 0.25931861996650696, "learning_rate": 1.937687571265761e-05, "loss": 0.5433894991874695, "step": 9607 }, { "epoch": 11.788957055214723, "grad_norm": 0.3210090398788452, "learning_rate": 1.9371985276499034e-05, "loss": 0.582573413848877, "step": 9608 }, { "epoch": 11.790184049079755, "grad_norm": 0.26524314284324646, "learning_rate": 1.9367095067189685e-05, "loss": 0.5848219394683838, "step": 9609 }, { "epoch": 11.791411042944786, "grad_norm": 0.3302682638168335, "learning_rate": 1.936220508492666e-05, "loss": 0.5784170627593994, "step": 9610 }, { "epoch": 11.792638036809816, "grad_norm": 0.2478141188621521, "learning_rate": 1.9357315329907074e-05, "loss": 0.7260841131210327, "step": 9611 }, { "epoch": 11.793865030674846, "grad_norm": 0.2359059751033783, "learning_rate": 1.935242580232801e-05, "loss": 0.7196325063705444, "step": 9612 }, { "epoch": 11.795092024539878, "grad_norm": 0.2881777286529541, "learning_rate": 1.9347536502386553e-05, "loss": 0.6869933605194092, "step": 9613 }, { "epoch": 11.796319018404908, "grad_norm": 0.26678168773651123, "learning_rate": 1.9342647430279786e-05, "loss": 0.7257282733917236, "step": 9614 }, { "epoch": 11.797546012269938, "grad_norm": 0.2230786234140396, "learning_rate": 1.933775858620476e-05, "loss": 0.5782450437545776, "step": 9615 }, { "epoch": 11.798773006134969, "grad_norm": 0.26259928941726685, "learning_rate": 1.933286997035854e-05, "loss": 0.7033650875091553, "step": 9616 }, { "epoch": 11.8, "grad_norm": 0.3691035509109497, "learning_rate": 1.9327981582938164e-05, "loss": 0.44239258766174316, "step": 9617 }, { "epoch": 11.801226993865031, "grad_norm": 0.2830300033092499, "learning_rate": 1.932309342414067e-05, "loss": 0.7731109261512756, "step": 9618 }, { "epoch": 11.802453987730061, "grad_norm": 0.24354392290115356, "learning_rate": 1.93182054941631e-05, "loss": 0.6245462894439697, "step": 9619 }, { "epoch": 11.803680981595091, "grad_norm": 0.21772722899913788, "learning_rate": 1.9313317793202455e-05, "loss": 0.6063433289527893, "step": 9620 }, { "epoch": 11.804907975460123, "grad_norm": 0.24257560074329376, "learning_rate": 1.9308430321455754e-05, "loss": 0.5727459192276001, "step": 9621 }, { "epoch": 11.806134969325154, "grad_norm": 0.233550027012825, "learning_rate": 1.930354307911998e-05, "loss": 0.6405918598175049, "step": 9622 }, { "epoch": 11.807361963190184, "grad_norm": 0.25465792417526245, "learning_rate": 1.9298656066392152e-05, "loss": 0.4967949092388153, "step": 9623 }, { "epoch": 11.808588957055214, "grad_norm": 0.2977510988712311, "learning_rate": 1.9293769283469232e-05, "loss": 0.5551639795303345, "step": 9624 }, { "epoch": 11.809815950920246, "grad_norm": 0.2927468717098236, "learning_rate": 1.9288882730548202e-05, "loss": 0.5464511513710022, "step": 9625 }, { "epoch": 11.811042944785276, "grad_norm": 0.31615352630615234, "learning_rate": 1.9283996407826014e-05, "loss": 0.27011457085609436, "step": 9626 }, { "epoch": 11.812269938650306, "grad_norm": 0.3130806088447571, "learning_rate": 1.927911031549963e-05, "loss": 0.4992949962615967, "step": 9627 }, { "epoch": 11.813496932515337, "grad_norm": 0.2643890380859375, "learning_rate": 1.9274224453766e-05, "loss": 0.5404645800590515, "step": 9628 }, { "epoch": 11.814723926380369, "grad_norm": 0.3151163160800934, "learning_rate": 1.9269338822822048e-05, "loss": 0.5644170641899109, "step": 9629 }, { "epoch": 11.815950920245399, "grad_norm": 0.31392449140548706, "learning_rate": 1.9264453422864705e-05, "loss": 0.45980799198150635, "step": 9630 }, { "epoch": 11.81717791411043, "grad_norm": 0.2396659106016159, "learning_rate": 1.925956825409088e-05, "loss": 0.7100940942764282, "step": 9631 }, { "epoch": 11.81840490797546, "grad_norm": 0.25081318616867065, "learning_rate": 1.925468331669749e-05, "loss": 0.6571832895278931, "step": 9632 }, { "epoch": 11.819631901840491, "grad_norm": 0.2754126489162445, "learning_rate": 1.9249798610881433e-05, "loss": 0.5207573771476746, "step": 9633 }, { "epoch": 11.820858895705522, "grad_norm": 0.32851165533065796, "learning_rate": 1.924491413683959e-05, "loss": 0.6046861410140991, "step": 9634 }, { "epoch": 11.822085889570552, "grad_norm": 0.2943875789642334, "learning_rate": 1.9240029894768842e-05, "loss": 0.7899008989334106, "step": 9635 }, { "epoch": 11.823312883435582, "grad_norm": 0.28850170969963074, "learning_rate": 1.9235145884866068e-05, "loss": 0.6068100929260254, "step": 9636 }, { "epoch": 11.824539877300614, "grad_norm": 0.25121229887008667, "learning_rate": 1.9230262107328115e-05, "loss": 0.7671653032302856, "step": 9637 }, { "epoch": 11.825766871165644, "grad_norm": 0.23775234818458557, "learning_rate": 1.922537856235185e-05, "loss": 0.7702902555465698, "step": 9638 }, { "epoch": 11.826993865030675, "grad_norm": 0.3494611382484436, "learning_rate": 1.9220495250134094e-05, "loss": 0.47882080078125, "step": 9639 }, { "epoch": 11.828220858895705, "grad_norm": 0.2880288362503052, "learning_rate": 1.921561217087169e-05, "loss": 0.7243320345878601, "step": 9640 }, { "epoch": 11.829447852760737, "grad_norm": 0.2799699902534485, "learning_rate": 1.9210729324761473e-05, "loss": 0.6183857917785645, "step": 9641 }, { "epoch": 11.830674846625767, "grad_norm": 0.29888293147087097, "learning_rate": 1.9205846712000236e-05, "loss": 0.9128921627998352, "step": 9642 }, { "epoch": 11.831901840490797, "grad_norm": 0.32036256790161133, "learning_rate": 1.9200964332784797e-05, "loss": 0.5190317630767822, "step": 9643 }, { "epoch": 11.833128834355827, "grad_norm": 0.300694078207016, "learning_rate": 1.9196082187311936e-05, "loss": 0.6500862836837769, "step": 9644 }, { "epoch": 11.83435582822086, "grad_norm": 0.2703522741794586, "learning_rate": 1.919120027577846e-05, "loss": 0.7028006315231323, "step": 9645 }, { "epoch": 11.83558282208589, "grad_norm": 0.24340300261974335, "learning_rate": 1.9186318598381132e-05, "loss": 0.6313120126724243, "step": 9646 }, { "epoch": 11.83680981595092, "grad_norm": 0.27604764699935913, "learning_rate": 1.9181437155316722e-05, "loss": 0.5635243058204651, "step": 9647 }, { "epoch": 11.83803680981595, "grad_norm": 0.2540474534034729, "learning_rate": 1.9176555946781982e-05, "loss": 0.588275671005249, "step": 9648 }, { "epoch": 11.839263803680982, "grad_norm": 0.2605104148387909, "learning_rate": 1.9171674972973662e-05, "loss": 0.8174633979797363, "step": 9649 }, { "epoch": 11.840490797546012, "grad_norm": 0.2521228492259979, "learning_rate": 1.9166794234088507e-05, "loss": 0.5654808282852173, "step": 9650 }, { "epoch": 11.841717791411043, "grad_norm": 0.2542824447154999, "learning_rate": 1.9161913730323234e-05, "loss": 0.6014357805252075, "step": 9651 }, { "epoch": 11.842944785276075, "grad_norm": 0.28966325521469116, "learning_rate": 1.915703346187457e-05, "loss": 0.7125221490859985, "step": 9652 }, { "epoch": 11.844171779141105, "grad_norm": 0.28652024269104004, "learning_rate": 1.915215342893922e-05, "loss": 0.47570300102233887, "step": 9653 }, { "epoch": 11.845398773006135, "grad_norm": 0.2586190104484558, "learning_rate": 1.9147273631713887e-05, "loss": 0.6280050277709961, "step": 9654 }, { "epoch": 11.846625766871165, "grad_norm": 0.22222229838371277, "learning_rate": 1.9142394070395267e-05, "loss": 0.8226921558380127, "step": 9655 }, { "epoch": 11.847852760736195, "grad_norm": 0.27901288866996765, "learning_rate": 1.913751474518003e-05, "loss": 0.6067864894866943, "step": 9656 }, { "epoch": 11.849079754601227, "grad_norm": 0.312906414270401, "learning_rate": 1.9132635656264857e-05, "loss": 0.7688937187194824, "step": 9657 }, { "epoch": 11.850306748466258, "grad_norm": 0.2635432183742523, "learning_rate": 1.9127756803846403e-05, "loss": 0.8130753636360168, "step": 9658 }, { "epoch": 11.851533742331288, "grad_norm": 0.25161951780319214, "learning_rate": 1.9122878188121324e-05, "loss": 0.5180455446243286, "step": 9659 }, { "epoch": 11.85276073619632, "grad_norm": 0.3220882713794708, "learning_rate": 1.9117999809286265e-05, "loss": 0.5363517999649048, "step": 9660 }, { "epoch": 11.85398773006135, "grad_norm": 0.2804092466831207, "learning_rate": 1.9113121667537856e-05, "loss": 0.5789508819580078, "step": 9661 }, { "epoch": 11.85521472392638, "grad_norm": 0.32632574439048767, "learning_rate": 1.910824376307273e-05, "loss": 0.590690016746521, "step": 9662 }, { "epoch": 11.85644171779141, "grad_norm": 0.3160863220691681, "learning_rate": 1.9103366096087483e-05, "loss": 0.5712788105010986, "step": 9663 }, { "epoch": 11.857668711656443, "grad_norm": 0.2837040424346924, "learning_rate": 1.9098488666778736e-05, "loss": 0.604761004447937, "step": 9664 }, { "epoch": 11.858895705521473, "grad_norm": 0.23892362415790558, "learning_rate": 1.9093611475343082e-05, "loss": 0.7150213718414307, "step": 9665 }, { "epoch": 11.860122699386503, "grad_norm": 0.3259752690792084, "learning_rate": 1.9088734521977097e-05, "loss": 0.47242140769958496, "step": 9666 }, { "epoch": 11.861349693251533, "grad_norm": 0.2696831524372101, "learning_rate": 1.9083857806877363e-05, "loss": 0.7774763703346252, "step": 9667 }, { "epoch": 11.862576687116565, "grad_norm": 0.2381206601858139, "learning_rate": 1.907898133024045e-05, "loss": 0.5491843223571777, "step": 9668 }, { "epoch": 11.863803680981595, "grad_norm": 0.2124134600162506, "learning_rate": 1.9074105092262916e-05, "loss": 0.641150176525116, "step": 9669 }, { "epoch": 11.865030674846626, "grad_norm": 0.2511957287788391, "learning_rate": 1.90692290931413e-05, "loss": 0.8125640153884888, "step": 9670 }, { "epoch": 11.866257668711656, "grad_norm": 0.29988694190979004, "learning_rate": 1.9064353333072154e-05, "loss": 0.5450963973999023, "step": 9671 }, { "epoch": 11.867484662576688, "grad_norm": 0.252961665391922, "learning_rate": 1.9059477812251985e-05, "loss": 0.5717439651489258, "step": 9672 }, { "epoch": 11.868711656441718, "grad_norm": 0.24745167791843414, "learning_rate": 1.9054602530877328e-05, "loss": 0.7649855017662048, "step": 9673 }, { "epoch": 11.869938650306748, "grad_norm": 0.25464653968811035, "learning_rate": 1.9049727489144687e-05, "loss": 0.7520514726638794, "step": 9674 }, { "epoch": 11.871165644171779, "grad_norm": 0.2886866331100464, "learning_rate": 1.904485268725056e-05, "loss": 0.5783470869064331, "step": 9675 }, { "epoch": 11.87239263803681, "grad_norm": 0.2499150037765503, "learning_rate": 1.9039978125391438e-05, "loss": 0.6206972002983093, "step": 9676 }, { "epoch": 11.87361963190184, "grad_norm": 0.31912463903427124, "learning_rate": 1.9035103803763792e-05, "loss": 0.4276407063007355, "step": 9677 }, { "epoch": 11.874846625766871, "grad_norm": 0.2561988830566406, "learning_rate": 1.9030229722564104e-05, "loss": 0.6351442337036133, "step": 9678 }, { "epoch": 11.876073619631901, "grad_norm": 0.31382375955581665, "learning_rate": 1.902535588198883e-05, "loss": 0.6251417994499207, "step": 9679 }, { "epoch": 11.877300613496933, "grad_norm": 0.33618152141571045, "learning_rate": 1.9020482282234417e-05, "loss": 0.5872153043746948, "step": 9680 }, { "epoch": 11.878527607361963, "grad_norm": 0.6443133354187012, "learning_rate": 1.9015608923497315e-05, "loss": 0.5460496544837952, "step": 9681 }, { "epoch": 11.879754601226994, "grad_norm": 0.2371283918619156, "learning_rate": 1.9010735805973943e-05, "loss": 0.728348970413208, "step": 9682 }, { "epoch": 11.880981595092024, "grad_norm": 0.3615013360977173, "learning_rate": 1.9005862929860728e-05, "loss": 0.5691897869110107, "step": 9683 }, { "epoch": 11.882208588957056, "grad_norm": 0.3121557831764221, "learning_rate": 1.900099029535409e-05, "loss": 0.6234989166259766, "step": 9684 }, { "epoch": 11.883435582822086, "grad_norm": 0.23413369059562683, "learning_rate": 1.8996117902650417e-05, "loss": 0.5604122877120972, "step": 9685 }, { "epoch": 11.884662576687116, "grad_norm": 0.36876752972602844, "learning_rate": 1.8991245751946108e-05, "loss": 0.6640349626541138, "step": 9686 }, { "epoch": 11.885889570552147, "grad_norm": 0.2730715274810791, "learning_rate": 1.898637384343755e-05, "loss": 0.7617384195327759, "step": 9687 }, { "epoch": 11.887116564417179, "grad_norm": 0.3023970127105713, "learning_rate": 1.898150217732111e-05, "loss": 0.6588196754455566, "step": 9688 }, { "epoch": 11.888343558282209, "grad_norm": 0.30395907163619995, "learning_rate": 1.897663075379314e-05, "loss": 0.7136653661727905, "step": 9689 }, { "epoch": 11.889570552147239, "grad_norm": 0.22905610501766205, "learning_rate": 1.8971759573050015e-05, "loss": 0.580471396446228, "step": 9690 }, { "epoch": 11.89079754601227, "grad_norm": 0.2564905881881714, "learning_rate": 1.896688863528807e-05, "loss": 0.7789915800094604, "step": 9691 }, { "epoch": 11.892024539877301, "grad_norm": 0.2665156126022339, "learning_rate": 1.8962017940703636e-05, "loss": 0.7402772903442383, "step": 9692 }, { "epoch": 11.893251533742331, "grad_norm": 0.32029247283935547, "learning_rate": 1.895714748949304e-05, "loss": 0.6171879768371582, "step": 9693 }, { "epoch": 11.894478527607362, "grad_norm": 0.22018110752105713, "learning_rate": 1.895227728185259e-05, "loss": 0.7142203450202942, "step": 9694 }, { "epoch": 11.895705521472392, "grad_norm": 0.30135729908943176, "learning_rate": 1.8947407317978594e-05, "loss": 0.6802735924720764, "step": 9695 }, { "epoch": 11.896932515337424, "grad_norm": 0.27160659432411194, "learning_rate": 1.894253759806735e-05, "loss": 0.5878649950027466, "step": 9696 }, { "epoch": 11.898159509202454, "grad_norm": 0.24887189269065857, "learning_rate": 1.8937668122315138e-05, "loss": 0.680994987487793, "step": 9697 }, { "epoch": 11.899386503067484, "grad_norm": 0.2866216003894806, "learning_rate": 1.8932798890918236e-05, "loss": 0.6232137084007263, "step": 9698 }, { "epoch": 11.900613496932515, "grad_norm": 0.26977407932281494, "learning_rate": 1.8927929904072903e-05, "loss": 0.5970661044120789, "step": 9699 }, { "epoch": 11.901840490797547, "grad_norm": 0.31730160117149353, "learning_rate": 1.8923061161975395e-05, "loss": 0.5882656574249268, "step": 9700 }, { "epoch": 11.903067484662577, "grad_norm": 0.23186412453651428, "learning_rate": 1.8918192664821967e-05, "loss": 0.6055925488471985, "step": 9701 }, { "epoch": 11.904294478527607, "grad_norm": 0.270039439201355, "learning_rate": 1.891332441280884e-05, "loss": 0.6509403586387634, "step": 9702 }, { "epoch": 11.905521472392637, "grad_norm": 0.3223806619644165, "learning_rate": 1.890845640613225e-05, "loss": 0.5955019593238831, "step": 9703 }, { "epoch": 11.90674846625767, "grad_norm": 0.2797326147556305, "learning_rate": 1.8903588644988407e-05, "loss": 0.6862818002700806, "step": 9704 }, { "epoch": 11.9079754601227, "grad_norm": 0.29990261793136597, "learning_rate": 1.8898721129573517e-05, "loss": 0.7204534411430359, "step": 9705 }, { "epoch": 11.90920245398773, "grad_norm": 0.28909537196159363, "learning_rate": 1.8893853860083784e-05, "loss": 0.6577056646347046, "step": 9706 }, { "epoch": 11.91042944785276, "grad_norm": 0.27590662240982056, "learning_rate": 1.8888986836715378e-05, "loss": 0.7605534791946411, "step": 9707 }, { "epoch": 11.911656441717792, "grad_norm": 0.30978548526763916, "learning_rate": 1.8884120059664488e-05, "loss": 0.406975656747818, "step": 9708 }, { "epoch": 11.912883435582822, "grad_norm": 0.23306098580360413, "learning_rate": 1.887925352912727e-05, "loss": 0.6524646282196045, "step": 9709 }, { "epoch": 11.914110429447852, "grad_norm": 0.21870733797550201, "learning_rate": 1.887438724529989e-05, "loss": 0.6589357852935791, "step": 9710 }, { "epoch": 11.915337423312884, "grad_norm": 0.3313468396663666, "learning_rate": 1.886952120837848e-05, "loss": 0.6079100370407104, "step": 9711 }, { "epoch": 11.916564417177915, "grad_norm": 0.2722230851650238, "learning_rate": 1.8864655418559196e-05, "loss": 0.5320117473602295, "step": 9712 }, { "epoch": 11.917791411042945, "grad_norm": 0.2612296938896179, "learning_rate": 1.885978987603815e-05, "loss": 0.5829370021820068, "step": 9713 }, { "epoch": 11.919018404907975, "grad_norm": 0.31978124380111694, "learning_rate": 1.8854924581011463e-05, "loss": 0.6415319442749023, "step": 9714 }, { "epoch": 11.920245398773005, "grad_norm": 0.3738347887992859, "learning_rate": 1.8850059533675242e-05, "loss": 0.6395041942596436, "step": 9715 }, { "epoch": 11.921472392638037, "grad_norm": 0.2845354676246643, "learning_rate": 1.8845194734225578e-05, "loss": 0.6107839345932007, "step": 9716 }, { "epoch": 11.922699386503067, "grad_norm": 0.24519862234592438, "learning_rate": 1.8840330182858566e-05, "loss": 0.8132548332214355, "step": 9717 }, { "epoch": 11.923926380368098, "grad_norm": 0.30574116110801697, "learning_rate": 1.8835465879770266e-05, "loss": 0.4082220792770386, "step": 9718 }, { "epoch": 11.92515337423313, "grad_norm": 0.23067162930965424, "learning_rate": 1.883060182515676e-05, "loss": 0.7196015119552612, "step": 9719 }, { "epoch": 11.92638036809816, "grad_norm": 0.31832996010780334, "learning_rate": 1.8825738019214104e-05, "loss": 0.40984708070755005, "step": 9720 }, { "epoch": 11.92760736196319, "grad_norm": 0.2828182280063629, "learning_rate": 1.882087446213833e-05, "loss": 0.6219784021377563, "step": 9721 }, { "epoch": 11.92883435582822, "grad_norm": 0.3917613923549652, "learning_rate": 1.881601115412549e-05, "loss": 0.4658561944961548, "step": 9722 }, { "epoch": 11.93006134969325, "grad_norm": 0.2682655453681946, "learning_rate": 1.88111480953716e-05, "loss": 0.708956241607666, "step": 9723 }, { "epoch": 11.931288343558283, "grad_norm": 0.29780492186546326, "learning_rate": 1.8806285286072675e-05, "loss": 0.6566009521484375, "step": 9724 }, { "epoch": 11.932515337423313, "grad_norm": 0.2609440088272095, "learning_rate": 1.8801422726424732e-05, "loss": 0.6822017431259155, "step": 9725 }, { "epoch": 11.933742331288343, "grad_norm": 0.26899415254592896, "learning_rate": 1.8796560416623756e-05, "loss": 0.5192329287528992, "step": 9726 }, { "epoch": 11.934969325153375, "grad_norm": 0.28718727827072144, "learning_rate": 1.879169835686574e-05, "loss": 0.7788536548614502, "step": 9727 }, { "epoch": 11.936196319018405, "grad_norm": 0.2724403738975525, "learning_rate": 1.8786836547346652e-05, "loss": 0.8143839836120605, "step": 9728 }, { "epoch": 11.937423312883435, "grad_norm": 0.32199567556381226, "learning_rate": 1.8781974988262467e-05, "loss": 0.6843305826187134, "step": 9729 }, { "epoch": 11.938650306748466, "grad_norm": 0.315804660320282, "learning_rate": 1.8777113679809137e-05, "loss": 0.6993259191513062, "step": 9730 }, { "epoch": 11.939877300613498, "grad_norm": 0.3308227062225342, "learning_rate": 1.8772252622182602e-05, "loss": 0.7248374223709106, "step": 9731 }, { "epoch": 11.941104294478528, "grad_norm": 0.29246121644973755, "learning_rate": 1.8767391815578807e-05, "loss": 0.6693466901779175, "step": 9732 }, { "epoch": 11.942331288343558, "grad_norm": 0.2696435749530792, "learning_rate": 1.876253126019365e-05, "loss": 0.4759097993373871, "step": 9733 }, { "epoch": 11.943558282208588, "grad_norm": 0.24113403260707855, "learning_rate": 1.8757670956223093e-05, "loss": 0.5692168474197388, "step": 9734 }, { "epoch": 11.94478527607362, "grad_norm": 0.25449618697166443, "learning_rate": 1.8752810903863005e-05, "loss": 0.6881263256072998, "step": 9735 }, { "epoch": 11.94601226993865, "grad_norm": 0.2918258011341095, "learning_rate": 1.87479511033093e-05, "loss": 0.6018540859222412, "step": 9736 }, { "epoch": 11.94723926380368, "grad_norm": 0.2970416843891144, "learning_rate": 1.8743091554757848e-05, "loss": 0.561610221862793, "step": 9737 }, { "epoch": 11.948466257668711, "grad_norm": 0.2962806224822998, "learning_rate": 1.873823225840453e-05, "loss": 0.5852125883102417, "step": 9738 }, { "epoch": 11.949693251533743, "grad_norm": 0.26593875885009766, "learning_rate": 1.8733373214445217e-05, "loss": 0.6646654009819031, "step": 9739 }, { "epoch": 11.950920245398773, "grad_norm": 0.2746818959712982, "learning_rate": 1.8728514423075753e-05, "loss": 0.6761110424995422, "step": 9740 }, { "epoch": 11.952147239263804, "grad_norm": 0.28022947907447815, "learning_rate": 1.8723655884491982e-05, "loss": 0.4916217029094696, "step": 9741 }, { "epoch": 11.953374233128834, "grad_norm": 0.255948543548584, "learning_rate": 1.871879759888975e-05, "loss": 0.6213390827178955, "step": 9742 }, { "epoch": 11.954601226993866, "grad_norm": 0.24906516075134277, "learning_rate": 1.8713939566464868e-05, "loss": 0.7446380853652954, "step": 9743 }, { "epoch": 11.955828220858896, "grad_norm": 0.27522146701812744, "learning_rate": 1.8709081787413152e-05, "loss": 0.47239047288894653, "step": 9744 }, { "epoch": 11.957055214723926, "grad_norm": 0.24519963562488556, "learning_rate": 1.870422426193041e-05, "loss": 0.6044414639472961, "step": 9745 }, { "epoch": 11.958282208588956, "grad_norm": 0.31124067306518555, "learning_rate": 1.8699366990212425e-05, "loss": 0.6485280394554138, "step": 9746 }, { "epoch": 11.959509202453988, "grad_norm": 0.26828041672706604, "learning_rate": 1.8694509972454993e-05, "loss": 0.6538652181625366, "step": 9747 }, { "epoch": 11.960736196319019, "grad_norm": 0.3289056420326233, "learning_rate": 1.868965320885388e-05, "loss": 0.7523958086967468, "step": 9748 }, { "epoch": 11.961963190184049, "grad_norm": 0.2631867825984955, "learning_rate": 1.868479669960485e-05, "loss": 0.6133990287780762, "step": 9749 }, { "epoch": 11.963190184049079, "grad_norm": 0.2412862926721573, "learning_rate": 1.867994044490365e-05, "loss": 0.7098823189735413, "step": 9750 }, { "epoch": 11.964417177914111, "grad_norm": 0.3219418227672577, "learning_rate": 1.8675084444946024e-05, "loss": 0.5026772618293762, "step": 9751 }, { "epoch": 11.965644171779141, "grad_norm": 0.3596903085708618, "learning_rate": 1.867022869992771e-05, "loss": 0.7310344576835632, "step": 9752 }, { "epoch": 11.966871165644172, "grad_norm": 0.23626618087291718, "learning_rate": 1.8665373210044417e-05, "loss": 0.7549744248390198, "step": 9753 }, { "epoch": 11.968098159509202, "grad_norm": 0.23478518426418304, "learning_rate": 1.866051797549187e-05, "loss": 0.6123968958854675, "step": 9754 }, { "epoch": 11.969325153374234, "grad_norm": 0.31054890155792236, "learning_rate": 1.865566299646576e-05, "loss": 0.6046900153160095, "step": 9755 }, { "epoch": 11.970552147239264, "grad_norm": 0.23373043537139893, "learning_rate": 1.8650808273161772e-05, "loss": 0.6809908151626587, "step": 9756 }, { "epoch": 11.971779141104294, "grad_norm": 0.3125699758529663, "learning_rate": 1.8645953805775597e-05, "loss": 0.774889349937439, "step": 9757 }, { "epoch": 11.973006134969324, "grad_norm": 0.27843353152275085, "learning_rate": 1.8641099594502912e-05, "loss": 0.447496235370636, "step": 9758 }, { "epoch": 11.974233128834356, "grad_norm": 0.24901564419269562, "learning_rate": 1.8636245639539358e-05, "loss": 0.5996526479721069, "step": 9759 }, { "epoch": 11.975460122699387, "grad_norm": 0.2219700664281845, "learning_rate": 1.8631391941080596e-05, "loss": 0.43525105714797974, "step": 9760 }, { "epoch": 11.976687116564417, "grad_norm": 0.24435971677303314, "learning_rate": 1.862653849932226e-05, "loss": 0.5630224347114563, "step": 9761 }, { "epoch": 11.977914110429447, "grad_norm": 0.28573310375213623, "learning_rate": 1.8621685314459976e-05, "loss": 0.4804995656013489, "step": 9762 }, { "epoch": 11.979141104294479, "grad_norm": 0.28220489621162415, "learning_rate": 1.8616832386689377e-05, "loss": 0.549595057964325, "step": 9763 }, { "epoch": 11.98036809815951, "grad_norm": 0.2429065853357315, "learning_rate": 1.8611979716206045e-05, "loss": 0.5768333673477173, "step": 9764 }, { "epoch": 11.98159509202454, "grad_norm": 0.3052835762500763, "learning_rate": 1.8607127303205595e-05, "loss": 0.5181483030319214, "step": 9765 }, { "epoch": 11.98282208588957, "grad_norm": 0.25704437494277954, "learning_rate": 1.860227514788362e-05, "loss": 0.6291012763977051, "step": 9766 }, { "epoch": 11.984049079754602, "grad_norm": 0.2679949104785919, "learning_rate": 1.8597423250435674e-05, "loss": 0.5290969014167786, "step": 9767 }, { "epoch": 11.985276073619632, "grad_norm": 0.230793759226799, "learning_rate": 1.8592571611057347e-05, "loss": 0.6426061391830444, "step": 9768 }, { "epoch": 11.986503067484662, "grad_norm": 0.28903934359550476, "learning_rate": 1.8587720229944176e-05, "loss": 0.8548725843429565, "step": 9769 }, { "epoch": 11.987730061349692, "grad_norm": 0.23136258125305176, "learning_rate": 1.8582869107291715e-05, "loss": 0.8090095520019531, "step": 9770 }, { "epoch": 11.988957055214724, "grad_norm": 0.2725217640399933, "learning_rate": 1.8578018243295503e-05, "loss": 0.7200582027435303, "step": 9771 }, { "epoch": 11.990184049079755, "grad_norm": 0.24154940247535706, "learning_rate": 1.8573167638151055e-05, "loss": 0.5761889219284058, "step": 9772 }, { "epoch": 11.991411042944785, "grad_norm": 0.26398345828056335, "learning_rate": 1.8568317292053894e-05, "loss": 0.49348634481430054, "step": 9773 }, { "epoch": 11.992638036809815, "grad_norm": 0.22925396263599396, "learning_rate": 1.8563467205199514e-05, "loss": 0.6109166741371155, "step": 9774 }, { "epoch": 11.993865030674847, "grad_norm": 0.3086511492729187, "learning_rate": 1.8558617377783412e-05, "loss": 0.5468834638595581, "step": 9775 }, { "epoch": 11.995092024539877, "grad_norm": 0.2354152500629425, "learning_rate": 1.855376781000108e-05, "loss": 0.7725341320037842, "step": 9776 }, { "epoch": 11.996319018404908, "grad_norm": 0.34505271911621094, "learning_rate": 1.8548918502047975e-05, "loss": 0.5818003416061401, "step": 9777 }, { "epoch": 11.99754601226994, "grad_norm": 0.34123605489730835, "learning_rate": 1.854406945411956e-05, "loss": 0.5853168368339539, "step": 9778 }, { "epoch": 11.99877300613497, "grad_norm": 0.33887895941734314, "learning_rate": 1.85392206664113e-05, "loss": 0.5731024742126465, "step": 9779 }, { "epoch": 12.0, "grad_norm": 0.3070206046104431, "learning_rate": 1.8534372139118633e-05, "loss": 0.4902607798576355, "step": 9780 }, { "epoch": 12.00122699386503, "grad_norm": 0.2025728076696396, "learning_rate": 1.852952387243698e-05, "loss": 0.535952627658844, "step": 9781 }, { "epoch": 12.002453987730062, "grad_norm": 0.2822614312171936, "learning_rate": 1.852467586656177e-05, "loss": 0.6995570659637451, "step": 9782 }, { "epoch": 12.003680981595092, "grad_norm": 0.2534826099872589, "learning_rate": 1.8519828121688404e-05, "loss": 0.7312007546424866, "step": 9783 }, { "epoch": 12.004907975460123, "grad_norm": 0.22575607895851135, "learning_rate": 1.851498063801228e-05, "loss": 0.6288744807243347, "step": 9784 }, { "epoch": 12.006134969325153, "grad_norm": 0.2602618634700775, "learning_rate": 1.8510133415728796e-05, "loss": 0.6127326488494873, "step": 9785 }, { "epoch": 12.007361963190185, "grad_norm": 0.2449302226305008, "learning_rate": 1.8505286455033325e-05, "loss": 0.6574564576148987, "step": 9786 }, { "epoch": 12.008588957055215, "grad_norm": 0.2788800895214081, "learning_rate": 1.8500439756121236e-05, "loss": 0.6508133411407471, "step": 9787 }, { "epoch": 12.009815950920245, "grad_norm": 0.2566382884979248, "learning_rate": 1.8495593319187877e-05, "loss": 0.6546616554260254, "step": 9788 }, { "epoch": 12.011042944785276, "grad_norm": 0.23993884027004242, "learning_rate": 1.84907471444286e-05, "loss": 0.832260012626648, "step": 9789 }, { "epoch": 12.012269938650308, "grad_norm": 0.2775382101535797, "learning_rate": 1.8485901232038745e-05, "loss": 0.44551175832748413, "step": 9790 }, { "epoch": 12.013496932515338, "grad_norm": 0.2708803117275238, "learning_rate": 1.848105558221363e-05, "loss": 0.621965765953064, "step": 9791 }, { "epoch": 12.014723926380368, "grad_norm": 0.24404160678386688, "learning_rate": 1.847621019514857e-05, "loss": 0.6751723885536194, "step": 9792 }, { "epoch": 12.015950920245398, "grad_norm": 0.2890763282775879, "learning_rate": 1.8471365071038876e-05, "loss": 0.45791637897491455, "step": 9793 }, { "epoch": 12.01717791411043, "grad_norm": 0.28453338146209717, "learning_rate": 1.846652021007983e-05, "loss": 0.7129330635070801, "step": 9794 }, { "epoch": 12.01840490797546, "grad_norm": 0.29635024070739746, "learning_rate": 1.8461675612466727e-05, "loss": 0.41921699047088623, "step": 9795 }, { "epoch": 12.01963190184049, "grad_norm": 0.2860238552093506, "learning_rate": 1.8456831278394828e-05, "loss": 0.725956916809082, "step": 9796 }, { "epoch": 12.020858895705521, "grad_norm": 0.20991384983062744, "learning_rate": 1.8451987208059393e-05, "loss": 0.6335127353668213, "step": 9797 }, { "epoch": 12.022085889570553, "grad_norm": 0.2621500790119171, "learning_rate": 1.8447143401655688e-05, "loss": 0.600813627243042, "step": 9798 }, { "epoch": 12.023312883435583, "grad_norm": 0.2975230813026428, "learning_rate": 1.8442299859378935e-05, "loss": 0.6710259914398193, "step": 9799 }, { "epoch": 12.024539877300613, "grad_norm": 0.249288871884346, "learning_rate": 1.8437456581424368e-05, "loss": 0.6861991882324219, "step": 9800 }, { "epoch": 12.025766871165644, "grad_norm": 0.2616172730922699, "learning_rate": 1.8432613567987215e-05, "loss": 0.6871270537376404, "step": 9801 }, { "epoch": 12.026993865030676, "grad_norm": 0.26144757866859436, "learning_rate": 1.8427770819262685e-05, "loss": 0.7590399980545044, "step": 9802 }, { "epoch": 12.028220858895706, "grad_norm": 0.35050418972969055, "learning_rate": 1.842292833544596e-05, "loss": 0.49932974576950073, "step": 9803 }, { "epoch": 12.029447852760736, "grad_norm": 0.3005865812301636, "learning_rate": 1.841808611673224e-05, "loss": 0.5832970142364502, "step": 9804 }, { "epoch": 12.030674846625766, "grad_norm": 0.2551955282688141, "learning_rate": 1.8413244163316696e-05, "loss": 0.7337114810943604, "step": 9805 }, { "epoch": 12.031901840490798, "grad_norm": 0.7553601264953613, "learning_rate": 1.8408402475394494e-05, "loss": 0.6036497354507446, "step": 9806 }, { "epoch": 12.033128834355828, "grad_norm": 0.2765858769416809, "learning_rate": 1.840356105316079e-05, "loss": 0.6743725538253784, "step": 9807 }, { "epoch": 12.034355828220859, "grad_norm": 0.21093113720417023, "learning_rate": 1.839871989681072e-05, "loss": 0.6192890405654907, "step": 9808 }, { "epoch": 12.035582822085889, "grad_norm": 0.22002755105495453, "learning_rate": 1.839387900653943e-05, "loss": 0.4595196843147278, "step": 9809 }, { "epoch": 12.036809815950921, "grad_norm": 0.31265726685523987, "learning_rate": 1.838903838254203e-05, "loss": 0.7572320699691772, "step": 9810 }, { "epoch": 12.038036809815951, "grad_norm": 0.3273557424545288, "learning_rate": 1.8384198025013642e-05, "loss": 0.5793870091438293, "step": 9811 }, { "epoch": 12.039263803680981, "grad_norm": 0.2648318409919739, "learning_rate": 1.8379357934149366e-05, "loss": 0.7065339088439941, "step": 9812 }, { "epoch": 12.040490797546012, "grad_norm": 0.2703879475593567, "learning_rate": 1.8374518110144283e-05, "loss": 0.6017889380455017, "step": 9813 }, { "epoch": 12.041717791411044, "grad_norm": 0.2803374230861664, "learning_rate": 1.8369678553193482e-05, "loss": 0.7497275471687317, "step": 9814 }, { "epoch": 12.042944785276074, "grad_norm": 0.2989960014820099, "learning_rate": 1.836483926349202e-05, "loss": 0.7439690232276917, "step": 9815 }, { "epoch": 12.044171779141104, "grad_norm": 0.2747398614883423, "learning_rate": 1.8360000241234974e-05, "loss": 0.642951250076294, "step": 9816 }, { "epoch": 12.045398773006134, "grad_norm": 0.29543814063072205, "learning_rate": 1.835516148661738e-05, "loss": 0.36943620443344116, "step": 9817 }, { "epoch": 12.046625766871166, "grad_norm": 0.28552648425102234, "learning_rate": 1.835032299983427e-05, "loss": 0.5558348894119263, "step": 9818 }, { "epoch": 12.047852760736196, "grad_norm": 0.3225303590297699, "learning_rate": 1.8345484781080674e-05, "loss": 0.699150800704956, "step": 9819 }, { "epoch": 12.049079754601227, "grad_norm": 0.2086479514837265, "learning_rate": 1.8340646830551608e-05, "loss": 0.6376171112060547, "step": 9820 }, { "epoch": 12.050306748466257, "grad_norm": 0.23601046204566956, "learning_rate": 1.8335809148442073e-05, "loss": 0.5446357727050781, "step": 9821 }, { "epoch": 12.051533742331289, "grad_norm": 0.29918915033340454, "learning_rate": 1.833097173494706e-05, "loss": 0.560728132724762, "step": 9822 }, { "epoch": 12.05276073619632, "grad_norm": 0.22185608744621277, "learning_rate": 1.8326134590261567e-05, "loss": 0.6712278127670288, "step": 9823 }, { "epoch": 12.05398773006135, "grad_norm": 0.33130940794944763, "learning_rate": 1.8321297714580547e-05, "loss": 0.5737951993942261, "step": 9824 }, { "epoch": 12.05521472392638, "grad_norm": 0.2914541959762573, "learning_rate": 1.831646110809897e-05, "loss": 0.7186314463615417, "step": 9825 }, { "epoch": 12.056441717791412, "grad_norm": 0.2593144476413727, "learning_rate": 1.8311624771011786e-05, "loss": 0.6834731101989746, "step": 9826 }, { "epoch": 12.057668711656442, "grad_norm": 0.24560965597629547, "learning_rate": 1.8306788703513934e-05, "loss": 0.6540240049362183, "step": 9827 }, { "epoch": 12.058895705521472, "grad_norm": 0.2663043439388275, "learning_rate": 1.8301952905800334e-05, "loss": 0.7472661137580872, "step": 9828 }, { "epoch": 12.060122699386502, "grad_norm": 0.23964707553386688, "learning_rate": 1.829711737806591e-05, "loss": 0.6566770672798157, "step": 9829 }, { "epoch": 12.061349693251534, "grad_norm": 0.2585977613925934, "learning_rate": 1.8292282120505567e-05, "loss": 0.48894190788269043, "step": 9830 }, { "epoch": 12.062576687116565, "grad_norm": 0.2784472405910492, "learning_rate": 1.8287447133314206e-05, "loss": 0.6418818235397339, "step": 9831 }, { "epoch": 12.063803680981595, "grad_norm": 0.23967662453651428, "learning_rate": 1.82826124166867e-05, "loss": 0.6528837084770203, "step": 9832 }, { "epoch": 12.065030674846625, "grad_norm": 0.2740848660469055, "learning_rate": 1.8277777970817932e-05, "loss": 0.6280186772346497, "step": 9833 }, { "epoch": 12.066257668711657, "grad_norm": 0.27933788299560547, "learning_rate": 1.8272943795902762e-05, "loss": 0.5235129594802856, "step": 9834 }, { "epoch": 12.067484662576687, "grad_norm": 0.24364353716373444, "learning_rate": 1.8268109892136036e-05, "loss": 0.5508551597595215, "step": 9835 }, { "epoch": 12.068711656441717, "grad_norm": 0.24154290556907654, "learning_rate": 1.8263276259712607e-05, "loss": 0.5928919911384583, "step": 9836 }, { "epoch": 12.069938650306748, "grad_norm": 0.22057336568832397, "learning_rate": 1.8258442898827297e-05, "loss": 0.6036928296089172, "step": 9837 }, { "epoch": 12.07116564417178, "grad_norm": 0.2594231367111206, "learning_rate": 1.8253609809674927e-05, "loss": 0.7241388559341431, "step": 9838 }, { "epoch": 12.07239263803681, "grad_norm": 0.27314504981040955, "learning_rate": 1.82487769924503e-05, "loss": 0.4374443590641022, "step": 9839 }, { "epoch": 12.07361963190184, "grad_norm": 0.40274229645729065, "learning_rate": 1.8243944447348217e-05, "loss": 0.3746393322944641, "step": 9840 }, { "epoch": 12.07484662576687, "grad_norm": 0.2428160160779953, "learning_rate": 1.823911217456347e-05, "loss": 0.7897061109542847, "step": 9841 }, { "epoch": 12.076073619631902, "grad_norm": 0.3014833629131317, "learning_rate": 1.8234280174290823e-05, "loss": 0.5341664552688599, "step": 9842 }, { "epoch": 12.077300613496933, "grad_norm": 0.2402293086051941, "learning_rate": 1.822944844672505e-05, "loss": 0.7633981704711914, "step": 9843 }, { "epoch": 12.078527607361963, "grad_norm": 0.2764407992362976, "learning_rate": 1.822461699206089e-05, "loss": 0.5184453725814819, "step": 9844 }, { "epoch": 12.079754601226995, "grad_norm": 0.2480444610118866, "learning_rate": 1.8219785810493105e-05, "loss": 0.4409443140029907, "step": 9845 }, { "epoch": 12.080981595092025, "grad_norm": 0.2746247947216034, "learning_rate": 1.8214954902216414e-05, "loss": 0.47997015714645386, "step": 9846 }, { "epoch": 12.082208588957055, "grad_norm": 0.2628289759159088, "learning_rate": 1.821012426742554e-05, "loss": 0.6516435146331787, "step": 9847 }, { "epoch": 12.083435582822085, "grad_norm": 0.25879305601119995, "learning_rate": 1.820529390631519e-05, "loss": 0.611544132232666, "step": 9848 }, { "epoch": 12.084662576687117, "grad_norm": 0.27154383063316345, "learning_rate": 1.8200463819080066e-05, "loss": 0.6046368479728699, "step": 9849 }, { "epoch": 12.085889570552148, "grad_norm": 0.27448034286499023, "learning_rate": 1.8195634005914854e-05, "loss": 0.5580927133560181, "step": 9850 }, { "epoch": 12.087116564417178, "grad_norm": 0.24307462573051453, "learning_rate": 1.819080446701422e-05, "loss": 0.4813350439071655, "step": 9851 }, { "epoch": 12.088343558282208, "grad_norm": 0.2561519145965576, "learning_rate": 1.8185975202572845e-05, "loss": 0.5564861297607422, "step": 9852 }, { "epoch": 12.08957055214724, "grad_norm": 0.2769668400287628, "learning_rate": 1.8181146212785376e-05, "loss": 0.6294400691986084, "step": 9853 }, { "epoch": 12.09079754601227, "grad_norm": 0.24007360637187958, "learning_rate": 1.817631749784645e-05, "loss": 0.6139334440231323, "step": 9854 }, { "epoch": 12.0920245398773, "grad_norm": 0.24943391978740692, "learning_rate": 1.8171489057950715e-05, "loss": 0.42264115810394287, "step": 9855 }, { "epoch": 12.09325153374233, "grad_norm": 0.3262324929237366, "learning_rate": 1.8166660893292768e-05, "loss": 0.4893662929534912, "step": 9856 }, { "epoch": 12.094478527607363, "grad_norm": 0.2671809494495392, "learning_rate": 1.8161833004067232e-05, "loss": 0.7153330445289612, "step": 9857 }, { "epoch": 12.095705521472393, "grad_norm": 0.2983618378639221, "learning_rate": 1.8157005390468714e-05, "loss": 0.7082103490829468, "step": 9858 }, { "epoch": 12.096932515337423, "grad_norm": 0.2299617975950241, "learning_rate": 1.815217805269178e-05, "loss": 0.7435087561607361, "step": 9859 }, { "epoch": 12.098159509202453, "grad_norm": 0.24909931421279907, "learning_rate": 1.814735099093103e-05, "loss": 0.8331875801086426, "step": 9860 }, { "epoch": 12.099386503067485, "grad_norm": 0.3131664991378784, "learning_rate": 1.814252420538101e-05, "loss": 0.7088756561279297, "step": 9861 }, { "epoch": 12.100613496932516, "grad_norm": 0.3037678897380829, "learning_rate": 1.8137697696236277e-05, "loss": 0.4456717371940613, "step": 9862 }, { "epoch": 12.101840490797546, "grad_norm": 0.2537980079650879, "learning_rate": 1.813287146369139e-05, "loss": 0.4530465602874756, "step": 9863 }, { "epoch": 12.103067484662576, "grad_norm": 0.28019341826438904, "learning_rate": 1.812804550794086e-05, "loss": 0.5401835441589355, "step": 9864 }, { "epoch": 12.104294478527608, "grad_norm": 0.23976948857307434, "learning_rate": 1.8123219829179222e-05, "loss": 0.7113765478134155, "step": 9865 }, { "epoch": 12.105521472392638, "grad_norm": 0.2537147104740143, "learning_rate": 1.811839442760097e-05, "loss": 0.8241192102432251, "step": 9866 }, { "epoch": 12.106748466257669, "grad_norm": 0.26075154542922974, "learning_rate": 1.8113569303400613e-05, "loss": 0.6066641211509705, "step": 9867 }, { "epoch": 12.107975460122699, "grad_norm": 0.24351570010185242, "learning_rate": 1.8108744456772636e-05, "loss": 0.7860256433486938, "step": 9868 }, { "epoch": 12.10920245398773, "grad_norm": 0.2371823936700821, "learning_rate": 1.8103919887911526e-05, "loss": 0.5530811548233032, "step": 9869 }, { "epoch": 12.110429447852761, "grad_norm": 0.23218442499637604, "learning_rate": 1.809909559701173e-05, "loss": 0.7296398878097534, "step": 9870 }, { "epoch": 12.111656441717791, "grad_norm": 0.26663732528686523, "learning_rate": 1.809427158426771e-05, "loss": 0.7337892055511475, "step": 9871 }, { "epoch": 12.112883435582821, "grad_norm": 0.1954319030046463, "learning_rate": 1.8089447849873908e-05, "loss": 0.49895352125167847, "step": 9872 }, { "epoch": 12.114110429447853, "grad_norm": 0.27997806668281555, "learning_rate": 1.808462439402475e-05, "loss": 0.6361831426620483, "step": 9873 }, { "epoch": 12.115337423312884, "grad_norm": 0.23772868514060974, "learning_rate": 1.8079801216914667e-05, "loss": 0.5275384783744812, "step": 9874 }, { "epoch": 12.116564417177914, "grad_norm": 0.22160561382770538, "learning_rate": 1.807497831873805e-05, "loss": 0.6278148293495178, "step": 9875 }, { "epoch": 12.117791411042944, "grad_norm": 0.25122278928756714, "learning_rate": 1.807015569968931e-05, "loss": 0.6718102097511292, "step": 9876 }, { "epoch": 12.119018404907976, "grad_norm": 0.2876792550086975, "learning_rate": 1.8065333359962834e-05, "loss": 0.43848174810409546, "step": 9877 }, { "epoch": 12.120245398773006, "grad_norm": 0.23801736533641815, "learning_rate": 1.8060511299752984e-05, "loss": 0.5475773811340332, "step": 9878 }, { "epoch": 12.121472392638037, "grad_norm": 0.25529778003692627, "learning_rate": 1.8055689519254138e-05, "loss": 0.6574859619140625, "step": 9879 }, { "epoch": 12.122699386503067, "grad_norm": 0.26040127873420715, "learning_rate": 1.8050868018660637e-05, "loss": 0.662485659122467, "step": 9880 }, { "epoch": 12.123926380368099, "grad_norm": 0.3299987316131592, "learning_rate": 1.8046046798166826e-05, "loss": 0.5111719965934753, "step": 9881 }, { "epoch": 12.125153374233129, "grad_norm": 0.24268721044063568, "learning_rate": 1.8041225857967043e-05, "loss": 0.4775053858757019, "step": 9882 }, { "epoch": 12.12638036809816, "grad_norm": 0.24685636162757874, "learning_rate": 1.8036405198255592e-05, "loss": 0.6197283267974854, "step": 9883 }, { "epoch": 12.12760736196319, "grad_norm": 0.2710878551006317, "learning_rate": 1.8031584819226792e-05, "loss": 0.6494911909103394, "step": 9884 }, { "epoch": 12.128834355828221, "grad_norm": 0.2685968577861786, "learning_rate": 1.802676472107493e-05, "loss": 0.7337403893470764, "step": 9885 }, { "epoch": 12.130061349693252, "grad_norm": 0.2861540615558624, "learning_rate": 1.8021944903994288e-05, "loss": 0.383455753326416, "step": 9886 }, { "epoch": 12.131288343558282, "grad_norm": 0.29326945543289185, "learning_rate": 1.8017125368179156e-05, "loss": 0.4424806833267212, "step": 9887 }, { "epoch": 12.132515337423312, "grad_norm": 0.3074653744697571, "learning_rate": 1.8012306113823776e-05, "loss": 0.5015331506729126, "step": 9888 }, { "epoch": 12.133742331288344, "grad_norm": 0.30391767621040344, "learning_rate": 1.8007487141122402e-05, "loss": 0.586094081401825, "step": 9889 }, { "epoch": 12.134969325153374, "grad_norm": 0.2835628390312195, "learning_rate": 1.8002668450269286e-05, "loss": 0.4970875084400177, "step": 9890 }, { "epoch": 12.136196319018405, "grad_norm": 0.2556532025337219, "learning_rate": 1.7997850041458654e-05, "loss": 0.7395627498626709, "step": 9891 }, { "epoch": 12.137423312883435, "grad_norm": 0.2479405403137207, "learning_rate": 1.7993031914884705e-05, "loss": 0.45360636711120605, "step": 9892 }, { "epoch": 12.138650306748467, "grad_norm": 0.3195740878582001, "learning_rate": 1.7988214070741664e-05, "loss": 0.4375007450580597, "step": 9893 }, { "epoch": 12.139877300613497, "grad_norm": 0.2566458284854889, "learning_rate": 1.798339650922371e-05, "loss": 0.7547483444213867, "step": 9894 }, { "epoch": 12.141104294478527, "grad_norm": 0.31213051080703735, "learning_rate": 1.7978579230525028e-05, "loss": 0.7143604755401611, "step": 9895 }, { "epoch": 12.142331288343557, "grad_norm": 0.2493138611316681, "learning_rate": 1.79737622348398e-05, "loss": 0.7947349548339844, "step": 9896 }, { "epoch": 12.14355828220859, "grad_norm": 0.21408037841320038, "learning_rate": 1.7968945522362172e-05, "loss": 0.6338511109352112, "step": 9897 }, { "epoch": 12.14478527607362, "grad_norm": 0.2581702470779419, "learning_rate": 1.7964129093286296e-05, "loss": 0.6445027589797974, "step": 9898 }, { "epoch": 12.14601226993865, "grad_norm": 0.2399553656578064, "learning_rate": 1.7959312947806312e-05, "loss": 0.6417017579078674, "step": 9899 }, { "epoch": 12.14723926380368, "grad_norm": 0.3012838363647461, "learning_rate": 1.795449708611634e-05, "loss": 0.7029995322227478, "step": 9900 }, { "epoch": 12.148466257668712, "grad_norm": 0.27931636571884155, "learning_rate": 1.79496815084105e-05, "loss": 0.5230838656425476, "step": 9901 }, { "epoch": 12.149693251533742, "grad_norm": 0.2230542153120041, "learning_rate": 1.7944866214882886e-05, "loss": 0.5874419212341309, "step": 9902 }, { "epoch": 12.150920245398773, "grad_norm": 0.2578567862510681, "learning_rate": 1.7940051205727593e-05, "loss": 0.7397985458374023, "step": 9903 }, { "epoch": 12.152147239263805, "grad_norm": 0.25639885663986206, "learning_rate": 1.7935236481138708e-05, "loss": 0.5628998875617981, "step": 9904 }, { "epoch": 12.153374233128835, "grad_norm": 0.2348751425743103, "learning_rate": 1.793042204131028e-05, "loss": 0.6003027558326721, "step": 9905 }, { "epoch": 12.154601226993865, "grad_norm": 0.24118076264858246, "learning_rate": 1.7925607886436384e-05, "loss": 0.5321888327598572, "step": 9906 }, { "epoch": 12.155828220858895, "grad_norm": 0.24861349165439606, "learning_rate": 1.7920794016711052e-05, "loss": 0.5913465023040771, "step": 9907 }, { "epoch": 12.157055214723927, "grad_norm": 0.2631952464580536, "learning_rate": 1.7915980432328325e-05, "loss": 0.6451666355133057, "step": 9908 }, { "epoch": 12.158282208588957, "grad_norm": 0.24986933171749115, "learning_rate": 1.7911167133482227e-05, "loss": 0.5728107690811157, "step": 9909 }, { "epoch": 12.159509202453988, "grad_norm": 0.2623046040534973, "learning_rate": 1.790635412036676e-05, "loss": 0.6327786445617676, "step": 9910 }, { "epoch": 12.160736196319018, "grad_norm": 0.29459449648857117, "learning_rate": 1.7901541393175913e-05, "loss": 0.7513449788093567, "step": 9911 }, { "epoch": 12.16196319018405, "grad_norm": 0.24730166792869568, "learning_rate": 1.7896728952103702e-05, "loss": 0.6057788729667664, "step": 9912 }, { "epoch": 12.16319018404908, "grad_norm": 0.2096785604953766, "learning_rate": 1.7891916797344088e-05, "loss": 0.5774545669555664, "step": 9913 }, { "epoch": 12.16441717791411, "grad_norm": 0.23303773999214172, "learning_rate": 1.788710492909103e-05, "loss": 0.5459635853767395, "step": 9914 }, { "epoch": 12.16564417177914, "grad_norm": 0.26818013191223145, "learning_rate": 1.7882293347538493e-05, "loss": 0.6642454862594604, "step": 9915 }, { "epoch": 12.166871165644173, "grad_norm": 0.2492551952600479, "learning_rate": 1.7877482052880406e-05, "loss": 0.7538043260574341, "step": 9916 }, { "epoch": 12.168098159509203, "grad_norm": 0.3043513000011444, "learning_rate": 1.7872671045310703e-05, "loss": 0.5974688529968262, "step": 9917 }, { "epoch": 12.169325153374233, "grad_norm": 0.2166983187198639, "learning_rate": 1.7867860325023306e-05, "loss": 0.5872923135757446, "step": 9918 }, { "epoch": 12.170552147239263, "grad_norm": 0.24850110709667206, "learning_rate": 1.7863049892212115e-05, "loss": 0.6502749919891357, "step": 9919 }, { "epoch": 12.171779141104295, "grad_norm": 0.3120362162590027, "learning_rate": 1.785823974707103e-05, "loss": 0.6235238909721375, "step": 9920 }, { "epoch": 12.173006134969325, "grad_norm": 0.22693561017513275, "learning_rate": 1.7853429889793925e-05, "loss": 0.8037881255149841, "step": 9921 }, { "epoch": 12.174233128834356, "grad_norm": 0.29113656282424927, "learning_rate": 1.784862032057468e-05, "loss": 0.588871955871582, "step": 9922 }, { "epoch": 12.175460122699386, "grad_norm": 0.28781190514564514, "learning_rate": 1.7843811039607163e-05, "loss": 0.7258619666099548, "step": 9923 }, { "epoch": 12.176687116564418, "grad_norm": 0.2842244803905487, "learning_rate": 1.7839002047085208e-05, "loss": 0.6956231594085693, "step": 9924 }, { "epoch": 12.177914110429448, "grad_norm": 0.26988038420677185, "learning_rate": 1.7834193343202653e-05, "loss": 0.6913471221923828, "step": 9925 }, { "epoch": 12.179141104294478, "grad_norm": 0.3113570213317871, "learning_rate": 1.7829384928153332e-05, "loss": 0.5676895380020142, "step": 9926 }, { "epoch": 12.180368098159509, "grad_norm": 0.3041812777519226, "learning_rate": 1.7824576802131046e-05, "loss": 0.5266377329826355, "step": 9927 }, { "epoch": 12.18159509202454, "grad_norm": 0.2654740512371063, "learning_rate": 1.7819768965329614e-05, "loss": 0.7701762318611145, "step": 9928 }, { "epoch": 12.18282208588957, "grad_norm": 0.2352747917175293, "learning_rate": 1.781496141794281e-05, "loss": 0.5607840418815613, "step": 9929 }, { "epoch": 12.184049079754601, "grad_norm": 0.2794991135597229, "learning_rate": 1.7810154160164422e-05, "loss": 0.7978973388671875, "step": 9930 }, { "epoch": 12.185276073619631, "grad_norm": 0.2930445969104767, "learning_rate": 1.780534719218821e-05, "loss": 0.6592078804969788, "step": 9931 }, { "epoch": 12.186503067484663, "grad_norm": 0.26395392417907715, "learning_rate": 1.780054051420793e-05, "loss": 0.6360665559768677, "step": 9932 }, { "epoch": 12.187730061349694, "grad_norm": 0.2824589014053345, "learning_rate": 1.7795734126417326e-05, "loss": 0.7252947092056274, "step": 9933 }, { "epoch": 12.188957055214724, "grad_norm": 0.28485533595085144, "learning_rate": 1.7790928029010143e-05, "loss": 0.5627976655960083, "step": 9934 }, { "epoch": 12.190184049079754, "grad_norm": 0.2717283368110657, "learning_rate": 1.7786122222180085e-05, "loss": 0.5878422260284424, "step": 9935 }, { "epoch": 12.191411042944786, "grad_norm": 0.34624406695365906, "learning_rate": 1.7781316706120864e-05, "loss": 0.4882428050041199, "step": 9936 }, { "epoch": 12.192638036809816, "grad_norm": 0.2562827169895172, "learning_rate": 1.7776511481026184e-05, "loss": 0.6498420834541321, "step": 9937 }, { "epoch": 12.193865030674846, "grad_norm": 0.2371635138988495, "learning_rate": 1.777170654708972e-05, "loss": 0.655094563961029, "step": 9938 }, { "epoch": 12.195092024539877, "grad_norm": 0.2684290111064911, "learning_rate": 1.7766901904505153e-05, "loss": 0.7304314374923706, "step": 9939 }, { "epoch": 12.196319018404909, "grad_norm": 0.22537477314472198, "learning_rate": 1.7762097553466136e-05, "loss": 0.6556706428527832, "step": 9940 }, { "epoch": 12.197546012269939, "grad_norm": 0.2706644833087921, "learning_rate": 1.7757293494166324e-05, "loss": 0.7795529365539551, "step": 9941 }, { "epoch": 12.198773006134969, "grad_norm": 0.22872290015220642, "learning_rate": 1.775248972679936e-05, "loss": 0.506952166557312, "step": 9942 }, { "epoch": 12.2, "grad_norm": 0.24434299767017365, "learning_rate": 1.774768625155886e-05, "loss": 0.6667752861976624, "step": 9943 }, { "epoch": 12.201226993865031, "grad_norm": 0.2590765058994293, "learning_rate": 1.7742883068638447e-05, "loss": 0.7780246138572693, "step": 9944 }, { "epoch": 12.202453987730062, "grad_norm": 0.23725154995918274, "learning_rate": 1.7738080178231715e-05, "loss": 0.5483522415161133, "step": 9945 }, { "epoch": 12.203680981595092, "grad_norm": 0.3150985538959503, "learning_rate": 1.7733277580532257e-05, "loss": 0.45785143971443176, "step": 9946 }, { "epoch": 12.204907975460122, "grad_norm": 0.26425886154174805, "learning_rate": 1.7728475275733663e-05, "loss": 0.5774816870689392, "step": 9947 }, { "epoch": 12.206134969325154, "grad_norm": 0.2750473916530609, "learning_rate": 1.7723673264029484e-05, "loss": 0.6251947283744812, "step": 9948 }, { "epoch": 12.207361963190184, "grad_norm": 0.2669443190097809, "learning_rate": 1.7718871545613283e-05, "loss": 0.6196791529655457, "step": 9949 }, { "epoch": 12.208588957055214, "grad_norm": 0.32757803797721863, "learning_rate": 1.7714070120678612e-05, "loss": 0.5095627307891846, "step": 9950 }, { "epoch": 12.209815950920245, "grad_norm": 0.2801523506641388, "learning_rate": 1.7709268989418985e-05, "loss": 0.568584680557251, "step": 9951 }, { "epoch": 12.211042944785277, "grad_norm": 0.25138697028160095, "learning_rate": 1.7704468152027935e-05, "loss": 0.5737891793251038, "step": 9952 }, { "epoch": 12.212269938650307, "grad_norm": 0.26865848898887634, "learning_rate": 1.7699667608698966e-05, "loss": 0.5363618731498718, "step": 9953 }, { "epoch": 12.213496932515337, "grad_norm": 0.24009299278259277, "learning_rate": 1.7694867359625573e-05, "loss": 0.4230051338672638, "step": 9954 }, { "epoch": 12.214723926380367, "grad_norm": 0.21289151906967163, "learning_rate": 1.769006740500125e-05, "loss": 0.6458077430725098, "step": 9955 }, { "epoch": 12.2159509202454, "grad_norm": 0.2733653783798218, "learning_rate": 1.7685267745019445e-05, "loss": 0.7037404179573059, "step": 9956 }, { "epoch": 12.21717791411043, "grad_norm": 0.2575635313987732, "learning_rate": 1.7680468379873643e-05, "loss": 0.6009310483932495, "step": 9957 }, { "epoch": 12.21840490797546, "grad_norm": 0.21204979717731476, "learning_rate": 1.7675669309757282e-05, "loss": 0.4634215533733368, "step": 9958 }, { "epoch": 12.21963190184049, "grad_norm": 0.32163888216018677, "learning_rate": 1.7670870534863808e-05, "loss": 0.6660972237586975, "step": 9959 }, { "epoch": 12.220858895705522, "grad_norm": 0.2099260538816452, "learning_rate": 1.7666072055386638e-05, "loss": 0.6636157631874084, "step": 9960 }, { "epoch": 12.222085889570552, "grad_norm": 0.232565239071846, "learning_rate": 1.7661273871519192e-05, "loss": 0.4498440623283386, "step": 9961 }, { "epoch": 12.223312883435582, "grad_norm": 0.28414493799209595, "learning_rate": 1.7656475983454855e-05, "loss": 0.7936923503875732, "step": 9962 }, { "epoch": 12.224539877300613, "grad_norm": 0.3230847716331482, "learning_rate": 1.765167839138703e-05, "loss": 0.4863039553165436, "step": 9963 }, { "epoch": 12.225766871165645, "grad_norm": 0.28763842582702637, "learning_rate": 1.7646881095509094e-05, "loss": 0.6109061241149902, "step": 9964 }, { "epoch": 12.226993865030675, "grad_norm": 0.27099910378456116, "learning_rate": 1.7642084096014405e-05, "loss": 0.6006396412849426, "step": 9965 }, { "epoch": 12.228220858895705, "grad_norm": 0.3488331139087677, "learning_rate": 1.7637287393096325e-05, "loss": 0.3976239562034607, "step": 9966 }, { "epoch": 12.229447852760735, "grad_norm": 0.2457742691040039, "learning_rate": 1.7632490986948184e-05, "loss": 0.5731558799743652, "step": 9967 }, { "epoch": 12.230674846625767, "grad_norm": 0.29482993483543396, "learning_rate": 1.7627694877763324e-05, "loss": 0.5953179597854614, "step": 9968 }, { "epoch": 12.231901840490798, "grad_norm": 0.29495006799697876, "learning_rate": 1.762289906573506e-05, "loss": 0.6670503616333008, "step": 9969 }, { "epoch": 12.233128834355828, "grad_norm": 0.28980326652526855, "learning_rate": 1.761810355105669e-05, "loss": 0.7038652896881104, "step": 9970 }, { "epoch": 12.23435582822086, "grad_norm": 0.25639358162879944, "learning_rate": 1.7613308333921518e-05, "loss": 0.6689528226852417, "step": 9971 }, { "epoch": 12.23558282208589, "grad_norm": 0.22851669788360596, "learning_rate": 1.7608513414522808e-05, "loss": 0.5303061008453369, "step": 9972 }, { "epoch": 12.23680981595092, "grad_norm": 0.3190580904483795, "learning_rate": 1.7603718793053846e-05, "loss": 0.2595544159412384, "step": 9973 }, { "epoch": 12.23803680981595, "grad_norm": 0.23347152769565582, "learning_rate": 1.7598924469707888e-05, "loss": 0.7133832573890686, "step": 9974 }, { "epoch": 12.239263803680982, "grad_norm": 0.31881746649742126, "learning_rate": 1.7594130444678176e-05, "loss": 0.6052122116088867, "step": 9975 }, { "epoch": 12.240490797546013, "grad_norm": 0.33599618077278137, "learning_rate": 1.7589336718157944e-05, "loss": 0.5375193953514099, "step": 9976 }, { "epoch": 12.241717791411043, "grad_norm": 0.30701616406440735, "learning_rate": 1.758454329034041e-05, "loss": 0.6001483201980591, "step": 9977 }, { "epoch": 12.242944785276073, "grad_norm": 0.23375175893306732, "learning_rate": 1.7579750161418778e-05, "loss": 0.5840659141540527, "step": 9978 }, { "epoch": 12.244171779141105, "grad_norm": 0.2690744698047638, "learning_rate": 1.7574957331586263e-05, "loss": 0.8595066070556641, "step": 9979 }, { "epoch": 12.245398773006135, "grad_norm": 0.24231255054473877, "learning_rate": 1.7570164801036044e-05, "loss": 0.6639364361763, "step": 9980 }, { "epoch": 12.246625766871166, "grad_norm": 0.29439374804496765, "learning_rate": 1.7565372569961287e-05, "loss": 0.5406255722045898, "step": 9981 }, { "epoch": 12.247852760736196, "grad_norm": 0.3113936483860016, "learning_rate": 1.756058063855516e-05, "loss": 0.6863690614700317, "step": 9982 }, { "epoch": 12.249079754601228, "grad_norm": 0.29005393385887146, "learning_rate": 1.755578900701081e-05, "loss": 0.5377461910247803, "step": 9983 }, { "epoch": 12.250306748466258, "grad_norm": 0.2554106116294861, "learning_rate": 1.755099767552137e-05, "loss": 0.7527409195899963, "step": 9984 }, { "epoch": 12.251533742331288, "grad_norm": 0.2324390858411789, "learning_rate": 1.7546206644279973e-05, "loss": 0.8521180152893066, "step": 9985 }, { "epoch": 12.252760736196318, "grad_norm": 0.26213666796684265, "learning_rate": 1.7541415913479723e-05, "loss": 0.5604529976844788, "step": 9986 }, { "epoch": 12.25398773006135, "grad_norm": 0.24120420217514038, "learning_rate": 1.7536625483313723e-05, "loss": 0.6356280446052551, "step": 9987 }, { "epoch": 12.25521472392638, "grad_norm": 0.2522071599960327, "learning_rate": 1.7531835353975075e-05, "loss": 0.548578679561615, "step": 9988 }, { "epoch": 12.256441717791411, "grad_norm": 0.263917475938797, "learning_rate": 1.7527045525656832e-05, "loss": 0.6757543087005615, "step": 9989 }, { "epoch": 12.257668711656441, "grad_norm": 0.24061816930770874, "learning_rate": 1.7522255998552078e-05, "loss": 0.5992058515548706, "step": 9990 }, { "epoch": 12.258895705521473, "grad_norm": 0.23655393719673157, "learning_rate": 1.7517466772853854e-05, "loss": 0.7845244407653809, "step": 9991 }, { "epoch": 12.260122699386503, "grad_norm": 0.33111459016799927, "learning_rate": 1.75126778487552e-05, "loss": 0.5927866697311401, "step": 9992 }, { "epoch": 12.261349693251534, "grad_norm": 0.2925342321395874, "learning_rate": 1.7507889226449155e-05, "loss": 0.5940933227539062, "step": 9993 }, { "epoch": 12.262576687116564, "grad_norm": 0.3515428602695465, "learning_rate": 1.7503100906128723e-05, "loss": 0.41211748123168945, "step": 9994 }, { "epoch": 12.263803680981596, "grad_norm": 0.23863694071769714, "learning_rate": 1.7498312887986907e-05, "loss": 0.89615797996521, "step": 9995 }, { "epoch": 12.265030674846626, "grad_norm": 0.26765334606170654, "learning_rate": 1.7493525172216712e-05, "loss": 0.6709370613098145, "step": 9996 }, { "epoch": 12.266257668711656, "grad_norm": 0.28375008702278137, "learning_rate": 1.7488737759011105e-05, "loss": 0.6381161212921143, "step": 9997 }, { "epoch": 12.267484662576686, "grad_norm": 0.2852858901023865, "learning_rate": 1.7483950648563055e-05, "loss": 0.6122692823410034, "step": 9998 }, { "epoch": 12.268711656441718, "grad_norm": 0.24440348148345947, "learning_rate": 1.7479163841065514e-05, "loss": 0.722074568271637, "step": 9999 }, { "epoch": 12.269938650306749, "grad_norm": 0.22136932611465454, "learning_rate": 1.747437733671143e-05, "loss": 0.5619195103645325, "step": 10000 }, { "epoch": 12.271165644171779, "grad_norm": 0.3050231337547302, "learning_rate": 1.746959113569373e-05, "loss": 0.5835624933242798, "step": 10001 }, { "epoch": 12.27239263803681, "grad_norm": 0.24656987190246582, "learning_rate": 1.7464805238205343e-05, "loss": 0.5793216228485107, "step": 10002 }, { "epoch": 12.273619631901841, "grad_norm": 0.2976642847061157, "learning_rate": 1.7460019644439157e-05, "loss": 0.7719542384147644, "step": 10003 }, { "epoch": 12.274846625766871, "grad_norm": 0.3059670329093933, "learning_rate": 1.7455234354588078e-05, "loss": 0.487260639667511, "step": 10004 }, { "epoch": 12.276073619631902, "grad_norm": 0.23547787964344025, "learning_rate": 1.745044936884499e-05, "loss": 0.7340688705444336, "step": 10005 }, { "epoch": 12.277300613496932, "grad_norm": 0.2648816406726837, "learning_rate": 1.7445664687402747e-05, "loss": 0.8888133764266968, "step": 10006 }, { "epoch": 12.278527607361964, "grad_norm": 0.2959928810596466, "learning_rate": 1.744088031045422e-05, "loss": 0.47579583525657654, "step": 10007 }, { "epoch": 12.279754601226994, "grad_norm": 0.2884868383407593, "learning_rate": 1.7436096238192244e-05, "loss": 0.5589560866355896, "step": 10008 }, { "epoch": 12.280981595092024, "grad_norm": 0.2398352026939392, "learning_rate": 1.7431312470809657e-05, "loss": 0.6574122905731201, "step": 10009 }, { "epoch": 12.282208588957054, "grad_norm": 0.3332534432411194, "learning_rate": 1.742652900849928e-05, "loss": 0.6391705274581909, "step": 10010 }, { "epoch": 12.283435582822086, "grad_norm": 0.2660227119922638, "learning_rate": 1.742174585145392e-05, "loss": 0.6295198202133179, "step": 10011 }, { "epoch": 12.284662576687117, "grad_norm": 0.27083224058151245, "learning_rate": 1.741696299986637e-05, "loss": 0.5299762487411499, "step": 10012 }, { "epoch": 12.285889570552147, "grad_norm": 0.23847423493862152, "learning_rate": 1.741218045392941e-05, "loss": 0.4747109115123749, "step": 10013 }, { "epoch": 12.287116564417177, "grad_norm": 0.32155799865722656, "learning_rate": 1.7407398213835815e-05, "loss": 0.6860710382461548, "step": 10014 }, { "epoch": 12.28834355828221, "grad_norm": 0.30711331963539124, "learning_rate": 1.740261627977835e-05, "loss": 0.4312334954738617, "step": 10015 }, { "epoch": 12.28957055214724, "grad_norm": 0.21175429224967957, "learning_rate": 1.7397834651949744e-05, "loss": 0.596039354801178, "step": 10016 }, { "epoch": 12.29079754601227, "grad_norm": 0.3067531883716583, "learning_rate": 1.7393053330542752e-05, "loss": 0.46465015411376953, "step": 10017 }, { "epoch": 12.2920245398773, "grad_norm": 0.2710665166378021, "learning_rate": 1.738827231575008e-05, "loss": 0.6010189056396484, "step": 10018 }, { "epoch": 12.293251533742332, "grad_norm": 0.2772218585014343, "learning_rate": 1.7383491607764438e-05, "loss": 0.5639665722846985, "step": 10019 }, { "epoch": 12.294478527607362, "grad_norm": 0.26887044310569763, "learning_rate": 1.737871120677853e-05, "loss": 0.641283392906189, "step": 10020 }, { "epoch": 12.295705521472392, "grad_norm": 0.2298635095357895, "learning_rate": 1.737393111298504e-05, "loss": 0.710869312286377, "step": 10021 }, { "epoch": 12.296932515337422, "grad_norm": 0.3569815456867218, "learning_rate": 1.736915132657662e-05, "loss": 0.48418641090393066, "step": 10022 }, { "epoch": 12.298159509202454, "grad_norm": 0.2583913505077362, "learning_rate": 1.736437184774596e-05, "loss": 0.7210261225700378, "step": 10023 }, { "epoch": 12.299386503067485, "grad_norm": 0.29425525665283203, "learning_rate": 1.7359592676685694e-05, "loss": 0.7436380982398987, "step": 10024 }, { "epoch": 12.300613496932515, "grad_norm": 0.25857943296432495, "learning_rate": 1.735481381358845e-05, "loss": 0.6298733949661255, "step": 10025 }, { "epoch": 12.301840490797545, "grad_norm": 0.2569587230682373, "learning_rate": 1.7350035258646864e-05, "loss": 0.7072460055351257, "step": 10026 }, { "epoch": 12.303067484662577, "grad_norm": 0.31340375542640686, "learning_rate": 1.7345257012053533e-05, "loss": 0.6116364002227783, "step": 10027 }, { "epoch": 12.304294478527607, "grad_norm": 0.3257131576538086, "learning_rate": 1.734047907400106e-05, "loss": 0.6585169434547424, "step": 10028 }, { "epoch": 12.305521472392638, "grad_norm": 0.22564077377319336, "learning_rate": 1.7335701444682035e-05, "loss": 0.5692448616027832, "step": 10029 }, { "epoch": 12.30674846625767, "grad_norm": 0.251862496137619, "learning_rate": 1.7330924124289023e-05, "loss": 0.643424928188324, "step": 10030 }, { "epoch": 12.3079754601227, "grad_norm": 0.2787356674671173, "learning_rate": 1.7326147113014587e-05, "loss": 0.47409847378730774, "step": 10031 }, { "epoch": 12.30920245398773, "grad_norm": 0.2461739182472229, "learning_rate": 1.732137041105127e-05, "loss": 0.6976724863052368, "step": 10032 }, { "epoch": 12.31042944785276, "grad_norm": 0.25633251667022705, "learning_rate": 1.7316594018591614e-05, "loss": 0.6823711395263672, "step": 10033 }, { "epoch": 12.31165644171779, "grad_norm": 0.24115873873233795, "learning_rate": 1.7311817935828145e-05, "loss": 0.5766950845718384, "step": 10034 }, { "epoch": 12.312883435582823, "grad_norm": 0.30713725090026855, "learning_rate": 1.7307042162953358e-05, "loss": 0.48067155480384827, "step": 10035 }, { "epoch": 12.314110429447853, "grad_norm": 0.24836324155330658, "learning_rate": 1.730226670015977e-05, "loss": 0.5812584757804871, "step": 10036 }, { "epoch": 12.315337423312883, "grad_norm": 0.2624185383319855, "learning_rate": 1.729749154763985e-05, "loss": 0.7879451513290405, "step": 10037 }, { "epoch": 12.316564417177915, "grad_norm": 0.28967076539993286, "learning_rate": 1.7292716705586077e-05, "loss": 0.5777398347854614, "step": 10038 }, { "epoch": 12.317791411042945, "grad_norm": 0.28503361344337463, "learning_rate": 1.728794217419092e-05, "loss": 0.6408138871192932, "step": 10039 }, { "epoch": 12.319018404907975, "grad_norm": 0.22787682712078094, "learning_rate": 1.7283167953646806e-05, "loss": 0.5569846630096436, "step": 10040 }, { "epoch": 12.320245398773006, "grad_norm": 0.27789124846458435, "learning_rate": 1.7278394044146196e-05, "loss": 0.4771428406238556, "step": 10041 }, { "epoch": 12.321472392638038, "grad_norm": 0.26638293266296387, "learning_rate": 1.727362044588149e-05, "loss": 0.6097725629806519, "step": 10042 }, { "epoch": 12.322699386503068, "grad_norm": 0.25942596793174744, "learning_rate": 1.7268847159045108e-05, "loss": 0.4238957166671753, "step": 10043 }, { "epoch": 12.323926380368098, "grad_norm": 0.30626004934310913, "learning_rate": 1.726407418382945e-05, "loss": 0.6355315446853638, "step": 10044 }, { "epoch": 12.325153374233128, "grad_norm": 0.24238361418247223, "learning_rate": 1.725930152042689e-05, "loss": 0.7633342742919922, "step": 10045 }, { "epoch": 12.32638036809816, "grad_norm": 0.23627936840057373, "learning_rate": 1.7254529169029814e-05, "loss": 0.6536461114883423, "step": 10046 }, { "epoch": 12.32760736196319, "grad_norm": 0.2916072607040405, "learning_rate": 1.724975712983058e-05, "loss": 0.5967239737510681, "step": 10047 }, { "epoch": 12.32883435582822, "grad_norm": 0.253812313079834, "learning_rate": 1.724498540302153e-05, "loss": 0.8347567319869995, "step": 10048 }, { "epoch": 12.330061349693251, "grad_norm": 0.28322699666023254, "learning_rate": 1.7240213988795e-05, "loss": 0.5792571902275085, "step": 10049 }, { "epoch": 12.331288343558283, "grad_norm": 0.28014320135116577, "learning_rate": 1.7235442887343312e-05, "loss": 0.6393141746520996, "step": 10050 }, { "epoch": 12.332515337423313, "grad_norm": 0.2946663200855255, "learning_rate": 1.723067209885878e-05, "loss": 0.6043229699134827, "step": 10051 }, { "epoch": 12.333742331288343, "grad_norm": 0.38551151752471924, "learning_rate": 1.7225901623533692e-05, "loss": 0.5473358631134033, "step": 10052 }, { "epoch": 12.334969325153374, "grad_norm": 0.2745901942253113, "learning_rate": 1.722113146156034e-05, "loss": 0.796230673789978, "step": 10053 }, { "epoch": 12.336196319018406, "grad_norm": 0.3094680905342102, "learning_rate": 1.7216361613130992e-05, "loss": 0.4497890770435333, "step": 10054 }, { "epoch": 12.337423312883436, "grad_norm": 0.3074202239513397, "learning_rate": 1.7211592078437905e-05, "loss": 0.7315910458564758, "step": 10055 }, { "epoch": 12.338650306748466, "grad_norm": 0.2810259759426117, "learning_rate": 1.7206822857673334e-05, "loss": 0.7358645796775818, "step": 10056 }, { "epoch": 12.339877300613496, "grad_norm": 0.30896803736686707, "learning_rate": 1.72020539510295e-05, "loss": 0.43205952644348145, "step": 10057 }, { "epoch": 12.341104294478528, "grad_norm": 0.29274845123291016, "learning_rate": 1.7197285358698637e-05, "loss": 0.7068111896514893, "step": 10058 }, { "epoch": 12.342331288343559, "grad_norm": 0.24356241524219513, "learning_rate": 1.7192517080872946e-05, "loss": 0.5460497736930847, "step": 10059 }, { "epoch": 12.343558282208589, "grad_norm": 0.2978205680847168, "learning_rate": 1.7187749117744616e-05, "loss": 0.6695431470870972, "step": 10060 }, { "epoch": 12.344785276073619, "grad_norm": 0.25731781125068665, "learning_rate": 1.718298146950585e-05, "loss": 0.5482520461082458, "step": 10061 }, { "epoch": 12.346012269938651, "grad_norm": 0.3450252413749695, "learning_rate": 1.7178214136348797e-05, "loss": 0.5510416626930237, "step": 10062 }, { "epoch": 12.347239263803681, "grad_norm": 0.29468995332717896, "learning_rate": 1.717344711846563e-05, "loss": 0.6985501646995544, "step": 10063 }, { "epoch": 12.348466257668711, "grad_norm": 0.25205889344215393, "learning_rate": 1.716868041604848e-05, "loss": 0.7888593673706055, "step": 10064 }, { "epoch": 12.349693251533742, "grad_norm": 0.3454911410808563, "learning_rate": 1.716391402928949e-05, "loss": 0.5749049186706543, "step": 10065 }, { "epoch": 12.350920245398774, "grad_norm": 0.3614708483219147, "learning_rate": 1.715914795838078e-05, "loss": 0.5626084804534912, "step": 10066 }, { "epoch": 12.352147239263804, "grad_norm": 0.2706715166568756, "learning_rate": 1.7154382203514445e-05, "loss": 0.8519998788833618, "step": 10067 }, { "epoch": 12.353374233128834, "grad_norm": 0.27504852414131165, "learning_rate": 1.714961676488259e-05, "loss": 0.4054766297340393, "step": 10068 }, { "epoch": 12.354601226993864, "grad_norm": 0.2773872911930084, "learning_rate": 1.7144851642677292e-05, "loss": 0.6136990189552307, "step": 10069 }, { "epoch": 12.355828220858896, "grad_norm": 0.27803274989128113, "learning_rate": 1.714008683709063e-05, "loss": 0.7541166543960571, "step": 10070 }, { "epoch": 12.357055214723927, "grad_norm": 0.23563876748085022, "learning_rate": 1.713532234831464e-05, "loss": 0.5934473276138306, "step": 10071 }, { "epoch": 12.358282208588957, "grad_norm": 0.2596365511417389, "learning_rate": 1.7130558176541385e-05, "loss": 0.5578148365020752, "step": 10072 }, { "epoch": 12.359509202453987, "grad_norm": 0.2857857644557953, "learning_rate": 1.712579432196288e-05, "loss": 0.6270753145217896, "step": 10073 }, { "epoch": 12.360736196319019, "grad_norm": 0.23598481714725494, "learning_rate": 1.7121030784771143e-05, "loss": 0.6845297813415527, "step": 10074 }, { "epoch": 12.36196319018405, "grad_norm": 0.2769327461719513, "learning_rate": 1.7116267565158194e-05, "loss": 0.7766731381416321, "step": 10075 }, { "epoch": 12.36319018404908, "grad_norm": 0.2567909061908722, "learning_rate": 1.711150466331601e-05, "loss": 0.7132014036178589, "step": 10076 }, { "epoch": 12.36441717791411, "grad_norm": 0.26713502407073975, "learning_rate": 1.7106742079436574e-05, "loss": 0.8536490201950073, "step": 10077 }, { "epoch": 12.365644171779142, "grad_norm": 0.2752772867679596, "learning_rate": 1.710197981371185e-05, "loss": 0.5284761190414429, "step": 10078 }, { "epoch": 12.366871165644172, "grad_norm": 0.28873127698898315, "learning_rate": 1.7097217866333795e-05, "loss": 0.5977211594581604, "step": 10079 }, { "epoch": 12.368098159509202, "grad_norm": 0.2378893345594406, "learning_rate": 1.7092456237494353e-05, "loss": 0.8004148006439209, "step": 10080 }, { "epoch": 12.369325153374232, "grad_norm": 0.25965213775634766, "learning_rate": 1.7087694927385443e-05, "loss": 0.7855523824691772, "step": 10081 }, { "epoch": 12.370552147239264, "grad_norm": 0.30494073033332825, "learning_rate": 1.7082933936198986e-05, "loss": 0.5144152045249939, "step": 10082 }, { "epoch": 12.371779141104295, "grad_norm": 0.3070131838321686, "learning_rate": 1.707817326412688e-05, "loss": 0.5612358450889587, "step": 10083 }, { "epoch": 12.373006134969325, "grad_norm": 0.27728891372680664, "learning_rate": 1.7073412911361015e-05, "loss": 0.5431454181671143, "step": 10084 }, { "epoch": 12.374233128834355, "grad_norm": 0.23124246299266815, "learning_rate": 1.7068652878093273e-05, "loss": 0.5852501392364502, "step": 10085 }, { "epoch": 12.375460122699387, "grad_norm": 0.27056893706321716, "learning_rate": 1.7063893164515505e-05, "loss": 0.6899493932723999, "step": 10086 }, { "epoch": 12.376687116564417, "grad_norm": 0.26833590865135193, "learning_rate": 1.7059133770819573e-05, "loss": 0.5608951449394226, "step": 10087 }, { "epoch": 12.377914110429447, "grad_norm": 0.2656225264072418, "learning_rate": 1.705437469719731e-05, "loss": 0.5990464687347412, "step": 10088 }, { "epoch": 12.379141104294478, "grad_norm": 0.2307528555393219, "learning_rate": 1.7049615943840537e-05, "loss": 0.5044652223587036, "step": 10089 }, { "epoch": 12.38036809815951, "grad_norm": 0.25538429617881775, "learning_rate": 1.7044857510941074e-05, "loss": 0.6193141937255859, "step": 10090 }, { "epoch": 12.38159509202454, "grad_norm": 0.2969212234020233, "learning_rate": 1.7040099398690718e-05, "loss": 0.3277524411678314, "step": 10091 }, { "epoch": 12.38282208588957, "grad_norm": 0.31009790301322937, "learning_rate": 1.7035341607281252e-05, "loss": 0.5909140110015869, "step": 10092 }, { "epoch": 12.3840490797546, "grad_norm": 0.27253544330596924, "learning_rate": 1.7030584136904447e-05, "loss": 0.5318449139595032, "step": 10093 }, { "epoch": 12.385276073619632, "grad_norm": 0.30259931087493896, "learning_rate": 1.7025826987752072e-05, "loss": 0.6359719038009644, "step": 10094 }, { "epoch": 12.386503067484663, "grad_norm": 0.2834674119949341, "learning_rate": 1.702107016001586e-05, "loss": 0.6112684607505798, "step": 10095 }, { "epoch": 12.387730061349693, "grad_norm": 0.3139294385910034, "learning_rate": 1.7016313653887565e-05, "loss": 0.6807054281234741, "step": 10096 }, { "epoch": 12.388957055214725, "grad_norm": 0.2733268737792969, "learning_rate": 1.7011557469558888e-05, "loss": 0.4122052788734436, "step": 10097 }, { "epoch": 12.390184049079755, "grad_norm": 0.2758947014808655, "learning_rate": 1.7006801607221546e-05, "loss": 0.4684387445449829, "step": 10098 }, { "epoch": 12.391411042944785, "grad_norm": 0.2711677551269531, "learning_rate": 1.700204606706724e-05, "loss": 0.5060458183288574, "step": 10099 }, { "epoch": 12.392638036809815, "grad_norm": 0.2891363799571991, "learning_rate": 1.6997290849287637e-05, "loss": 0.487071692943573, "step": 10100 }, { "epoch": 12.393865030674847, "grad_norm": 0.3028061091899872, "learning_rate": 1.6992535954074418e-05, "loss": 0.5479363799095154, "step": 10101 }, { "epoch": 12.395092024539878, "grad_norm": 0.2689127027988434, "learning_rate": 1.6987781381619237e-05, "loss": 0.5063247084617615, "step": 10102 }, { "epoch": 12.396319018404908, "grad_norm": 0.30449843406677246, "learning_rate": 1.698302713211374e-05, "loss": 0.6038601398468018, "step": 10103 }, { "epoch": 12.397546012269938, "grad_norm": 0.2668047547340393, "learning_rate": 1.6978273205749557e-05, "loss": 0.6640708446502686, "step": 10104 }, { "epoch": 12.39877300613497, "grad_norm": 0.2627141773700714, "learning_rate": 1.6973519602718296e-05, "loss": 0.6626731157302856, "step": 10105 }, { "epoch": 12.4, "grad_norm": 0.2466660887002945, "learning_rate": 1.6968766323211567e-05, "loss": 0.7888202667236328, "step": 10106 }, { "epoch": 12.40122699386503, "grad_norm": 0.2546045780181885, "learning_rate": 1.6964013367420966e-05, "loss": 0.7488671541213989, "step": 10107 }, { "epoch": 12.40245398773006, "grad_norm": 0.29014673829078674, "learning_rate": 1.6959260735538064e-05, "loss": 0.5262154936790466, "step": 10108 }, { "epoch": 12.403680981595093, "grad_norm": 0.31530115008354187, "learning_rate": 1.6954508427754434e-05, "loss": 0.5635057687759399, "step": 10109 }, { "epoch": 12.404907975460123, "grad_norm": 0.32960712909698486, "learning_rate": 1.6949756444261616e-05, "loss": 0.6472989320755005, "step": 10110 }, { "epoch": 12.406134969325153, "grad_norm": 0.3179148733615875, "learning_rate": 1.6945004785251152e-05, "loss": 0.5511329770088196, "step": 10111 }, { "epoch": 12.407361963190183, "grad_norm": 0.26756390929222107, "learning_rate": 1.6940253450914573e-05, "loss": 0.6309853792190552, "step": 10112 }, { "epoch": 12.408588957055215, "grad_norm": 0.2535991370677948, "learning_rate": 1.69355024414434e-05, "loss": 0.7322797775268555, "step": 10113 }, { "epoch": 12.409815950920246, "grad_norm": 0.23701563477516174, "learning_rate": 1.6930751757029117e-05, "loss": 0.7708555459976196, "step": 10114 }, { "epoch": 12.411042944785276, "grad_norm": 0.2624150812625885, "learning_rate": 1.6926001397863213e-05, "loss": 0.7156323194503784, "step": 10115 }, { "epoch": 12.412269938650306, "grad_norm": 0.3284042179584503, "learning_rate": 1.6921251364137176e-05, "loss": 0.9360648393630981, "step": 10116 }, { "epoch": 12.413496932515338, "grad_norm": 0.2761356830596924, "learning_rate": 1.6916501656042443e-05, "loss": 0.6747140884399414, "step": 10117 }, { "epoch": 12.414723926380368, "grad_norm": 0.2902049124240875, "learning_rate": 1.6911752273770483e-05, "loss": 0.5896989703178406, "step": 10118 }, { "epoch": 12.415950920245399, "grad_norm": 0.24704918265342712, "learning_rate": 1.690700321751272e-05, "loss": 0.5552791357040405, "step": 10119 }, { "epoch": 12.417177914110429, "grad_norm": 0.28739026188850403, "learning_rate": 1.6902254487460566e-05, "loss": 0.5635565519332886, "step": 10120 }, { "epoch": 12.41840490797546, "grad_norm": 0.41730839014053345, "learning_rate": 1.689750608380545e-05, "loss": 0.5042228102684021, "step": 10121 }, { "epoch": 12.419631901840491, "grad_norm": 0.3078708350658417, "learning_rate": 1.6892758006738747e-05, "loss": 0.7477552890777588, "step": 10122 }, { "epoch": 12.420858895705521, "grad_norm": 0.26533347368240356, "learning_rate": 1.6888010256451848e-05, "loss": 0.5437774658203125, "step": 10123 }, { "epoch": 12.422085889570551, "grad_norm": 0.34176450967788696, "learning_rate": 1.688326283313612e-05, "loss": 0.39702045917510986, "step": 10124 }, { "epoch": 12.423312883435583, "grad_norm": 0.2578549087047577, "learning_rate": 1.6878515736982915e-05, "loss": 0.49783194065093994, "step": 10125 }, { "epoch": 12.424539877300614, "grad_norm": 0.3213890790939331, "learning_rate": 1.687376896818358e-05, "loss": 0.5345302224159241, "step": 10126 }, { "epoch": 12.425766871165644, "grad_norm": 0.2240833342075348, "learning_rate": 1.686902252692944e-05, "loss": 0.737379789352417, "step": 10127 }, { "epoch": 12.426993865030674, "grad_norm": 0.27287575602531433, "learning_rate": 1.6864276413411813e-05, "loss": 0.6177191734313965, "step": 10128 }, { "epoch": 12.428220858895706, "grad_norm": 0.25661808252334595, "learning_rate": 1.6859530627822e-05, "loss": 0.635608971118927, "step": 10129 }, { "epoch": 12.429447852760736, "grad_norm": 0.29704010486602783, "learning_rate": 1.6854785170351284e-05, "loss": 0.6318725347518921, "step": 10130 }, { "epoch": 12.430674846625767, "grad_norm": 0.25522860884666443, "learning_rate": 1.6850040041190957e-05, "loss": 0.7168127298355103, "step": 10131 }, { "epoch": 12.431901840490797, "grad_norm": 0.3152511715888977, "learning_rate": 1.6845295240532267e-05, "loss": 0.4893918037414551, "step": 10132 }, { "epoch": 12.433128834355829, "grad_norm": 0.3175921142101288, "learning_rate": 1.6840550768566457e-05, "loss": 0.5987886190414429, "step": 10133 }, { "epoch": 12.434355828220859, "grad_norm": 0.2758758068084717, "learning_rate": 1.6835806625484784e-05, "loss": 0.727737545967102, "step": 10134 }, { "epoch": 12.43558282208589, "grad_norm": 0.2272300273180008, "learning_rate": 1.6831062811478466e-05, "loss": 0.5489065647125244, "step": 10135 }, { "epoch": 12.43680981595092, "grad_norm": 0.24528847634792328, "learning_rate": 1.6826319326738698e-05, "loss": 0.76882004737854, "step": 10136 }, { "epoch": 12.438036809815952, "grad_norm": 0.22743985056877136, "learning_rate": 1.6821576171456693e-05, "loss": 0.4894009530544281, "step": 10137 }, { "epoch": 12.439263803680982, "grad_norm": 0.2922298014163971, "learning_rate": 1.6816833345823625e-05, "loss": 0.4836640954017639, "step": 10138 }, { "epoch": 12.440490797546012, "grad_norm": 0.29893729090690613, "learning_rate": 1.6812090850030662e-05, "loss": 0.5843310356140137, "step": 10139 }, { "epoch": 12.441717791411042, "grad_norm": 0.23246198892593384, "learning_rate": 1.6807348684268972e-05, "loss": 0.5430499911308289, "step": 10140 }, { "epoch": 12.442944785276074, "grad_norm": 0.2337871938943863, "learning_rate": 1.6802606848729683e-05, "loss": 0.6986418962478638, "step": 10141 }, { "epoch": 12.444171779141104, "grad_norm": 0.27920040488243103, "learning_rate": 1.679786534360394e-05, "loss": 0.6143480539321899, "step": 10142 }, { "epoch": 12.445398773006135, "grad_norm": 0.3022783100605011, "learning_rate": 1.6793124169082842e-05, "loss": 0.41694533824920654, "step": 10143 }, { "epoch": 12.446625766871165, "grad_norm": 0.2429829090833664, "learning_rate": 1.678838332535751e-05, "loss": 0.6223881840705872, "step": 10144 }, { "epoch": 12.447852760736197, "grad_norm": 0.3284783661365509, "learning_rate": 1.6783642812619023e-05, "loss": 0.5721602439880371, "step": 10145 }, { "epoch": 12.449079754601227, "grad_norm": 0.287800133228302, "learning_rate": 1.6778902631058457e-05, "loss": 0.5060799717903137, "step": 10146 }, { "epoch": 12.450306748466257, "grad_norm": 0.28350773453712463, "learning_rate": 1.6774162780866886e-05, "loss": 0.6968005895614624, "step": 10147 }, { "epoch": 12.451533742331288, "grad_norm": 0.3153511583805084, "learning_rate": 1.6769423262235348e-05, "loss": 0.7665244936943054, "step": 10148 }, { "epoch": 12.45276073619632, "grad_norm": 0.27871423959732056, "learning_rate": 1.6764684075354882e-05, "loss": 0.5585576891899109, "step": 10149 }, { "epoch": 12.45398773006135, "grad_norm": 0.24953338503837585, "learning_rate": 1.6759945220416518e-05, "loss": 0.7172621488571167, "step": 10150 }, { "epoch": 12.45521472392638, "grad_norm": 0.26069509983062744, "learning_rate": 1.6755206697611258e-05, "loss": 0.6488775014877319, "step": 10151 }, { "epoch": 12.45644171779141, "grad_norm": 0.27753397822380066, "learning_rate": 1.67504685071301e-05, "loss": 0.4123743772506714, "step": 10152 }, { "epoch": 12.457668711656442, "grad_norm": 0.31305116415023804, "learning_rate": 1.674573064916403e-05, "loss": 0.5313441753387451, "step": 10153 }, { "epoch": 12.458895705521472, "grad_norm": 0.32661905884742737, "learning_rate": 1.6740993123904015e-05, "loss": 0.4823383092880249, "step": 10154 }, { "epoch": 12.460122699386503, "grad_norm": 0.23956353962421417, "learning_rate": 1.6736255931541016e-05, "loss": 0.7102557420730591, "step": 10155 }, { "epoch": 12.461349693251535, "grad_norm": 0.2795650064945221, "learning_rate": 1.6731519072265957e-05, "loss": 0.5781627893447876, "step": 10156 }, { "epoch": 12.462576687116565, "grad_norm": 0.34114524722099304, "learning_rate": 1.6726782546269794e-05, "loss": 0.5397638082504272, "step": 10157 }, { "epoch": 12.463803680981595, "grad_norm": 0.32271674275398254, "learning_rate": 1.6722046353743425e-05, "loss": 0.6231786012649536, "step": 10158 }, { "epoch": 12.465030674846625, "grad_norm": 0.3115490674972534, "learning_rate": 1.6717310494877763e-05, "loss": 0.7224016189575195, "step": 10159 }, { "epoch": 12.466257668711656, "grad_norm": 0.3431662619113922, "learning_rate": 1.6712574969863694e-05, "loss": 0.6989564895629883, "step": 10160 }, { "epoch": 12.467484662576688, "grad_norm": 0.3260856568813324, "learning_rate": 1.670783977889208e-05, "loss": 0.5158926844596863, "step": 10161 }, { "epoch": 12.468711656441718, "grad_norm": 0.26906198263168335, "learning_rate": 1.670310492215381e-05, "loss": 0.5031999349594116, "step": 10162 }, { "epoch": 12.469938650306748, "grad_norm": 0.2937507927417755, "learning_rate": 1.6698370399839708e-05, "loss": 0.5551905632019043, "step": 10163 }, { "epoch": 12.47116564417178, "grad_norm": 0.2955954074859619, "learning_rate": 1.6693636212140622e-05, "loss": 0.5093910098075867, "step": 10164 }, { "epoch": 12.47239263803681, "grad_norm": 0.21276377141475677, "learning_rate": 1.6688902359247365e-05, "loss": 0.5500988364219666, "step": 10165 }, { "epoch": 12.47361963190184, "grad_norm": 0.2692640423774719, "learning_rate": 1.668416884135075e-05, "loss": 0.4423633813858032, "step": 10166 }, { "epoch": 12.47484662576687, "grad_norm": 0.27353137731552124, "learning_rate": 1.667943565864158e-05, "loss": 0.570559024810791, "step": 10167 }, { "epoch": 12.476073619631903, "grad_norm": 0.26472708582878113, "learning_rate": 1.667470281131062e-05, "loss": 0.5356417298316956, "step": 10168 }, { "epoch": 12.477300613496933, "grad_norm": 0.27241379022598267, "learning_rate": 1.6669970299548647e-05, "loss": 0.6893892288208008, "step": 10169 }, { "epoch": 12.478527607361963, "grad_norm": 0.28927284479141235, "learning_rate": 1.6665238123546415e-05, "loss": 0.792728066444397, "step": 10170 }, { "epoch": 12.479754601226993, "grad_norm": 0.24793633818626404, "learning_rate": 1.666050628349466e-05, "loss": 0.5297935009002686, "step": 10171 }, { "epoch": 12.480981595092025, "grad_norm": 0.2883059084415436, "learning_rate": 1.6655774779584116e-05, "loss": 0.5296998023986816, "step": 10172 }, { "epoch": 12.482208588957056, "grad_norm": 0.25397130846977234, "learning_rate": 1.6651043612005483e-05, "loss": 0.8590284585952759, "step": 10173 }, { "epoch": 12.483435582822086, "grad_norm": 0.2511109709739685, "learning_rate": 1.664631278094948e-05, "loss": 0.5725734233856201, "step": 10174 }, { "epoch": 12.484662576687116, "grad_norm": 0.26992708444595337, "learning_rate": 1.664158228660678e-05, "loss": 0.6012997627258301, "step": 10175 }, { "epoch": 12.485889570552148, "grad_norm": 0.28267863392829895, "learning_rate": 1.6636852129168057e-05, "loss": 0.6767833232879639, "step": 10176 }, { "epoch": 12.487116564417178, "grad_norm": 0.23285169899463654, "learning_rate": 1.6632122308823974e-05, "loss": 0.6801580190658569, "step": 10177 }, { "epoch": 12.488343558282208, "grad_norm": 0.3010447025299072, "learning_rate": 1.662739282576517e-05, "loss": 0.45633918046951294, "step": 10178 }, { "epoch": 12.489570552147239, "grad_norm": 0.2841428816318512, "learning_rate": 1.662266368018228e-05, "loss": 0.7147191762924194, "step": 10179 }, { "epoch": 12.49079754601227, "grad_norm": 0.23274552822113037, "learning_rate": 1.6617934872265932e-05, "loss": 0.7287735939025879, "step": 10180 }, { "epoch": 12.4920245398773, "grad_norm": 0.28285330533981323, "learning_rate": 1.6613206402206722e-05, "loss": 0.48897784948349, "step": 10181 }, { "epoch": 12.493251533742331, "grad_norm": 0.28603431582450867, "learning_rate": 1.660847827019524e-05, "loss": 0.49159735441207886, "step": 10182 }, { "epoch": 12.494478527607361, "grad_norm": 0.2466898262500763, "learning_rate": 1.660375047642207e-05, "loss": 0.704745888710022, "step": 10183 }, { "epoch": 12.495705521472393, "grad_norm": 0.25442060828208923, "learning_rate": 1.6599023021077765e-05, "loss": 0.7934205532073975, "step": 10184 }, { "epoch": 12.496932515337424, "grad_norm": 0.32823923230171204, "learning_rate": 1.6594295904352884e-05, "loss": 0.6877128481864929, "step": 10185 }, { "epoch": 12.498159509202454, "grad_norm": 0.25810161232948303, "learning_rate": 1.6589569126437966e-05, "loss": 0.6395283937454224, "step": 10186 }, { "epoch": 12.499386503067484, "grad_norm": 0.24378056824207306, "learning_rate": 1.6584842687523523e-05, "loss": 0.7162209749221802, "step": 10187 }, { "epoch": 12.500613496932516, "grad_norm": 0.2386993169784546, "learning_rate": 1.6580116587800077e-05, "loss": 0.714419424533844, "step": 10188 }, { "epoch": 12.501840490797546, "grad_norm": 0.3186410963535309, "learning_rate": 1.657539082745811e-05, "loss": 0.5807203054428101, "step": 10189 }, { "epoch": 12.503067484662576, "grad_norm": 0.3406020700931549, "learning_rate": 1.6570665406688112e-05, "loss": 0.3627232611179352, "step": 10190 }, { "epoch": 12.504294478527607, "grad_norm": 0.27394214272499084, "learning_rate": 1.6565940325680556e-05, "loss": 0.5129978060722351, "step": 10191 }, { "epoch": 12.505521472392639, "grad_norm": 0.2738613784313202, "learning_rate": 1.6561215584625885e-05, "loss": 0.6474271416664124, "step": 10192 }, { "epoch": 12.506748466257669, "grad_norm": 0.2632056474685669, "learning_rate": 1.655649118371455e-05, "loss": 0.6258596777915955, "step": 10193 }, { "epoch": 12.5079754601227, "grad_norm": 0.30533695220947266, "learning_rate": 1.655176712313697e-05, "loss": 0.7443661093711853, "step": 10194 }, { "epoch": 12.50920245398773, "grad_norm": 0.26004549860954285, "learning_rate": 1.6547043403083562e-05, "loss": 0.5342870950698853, "step": 10195 }, { "epoch": 12.510429447852761, "grad_norm": 0.26915237307548523, "learning_rate": 1.6542320023744733e-05, "loss": 0.4794132709503174, "step": 10196 }, { "epoch": 12.511656441717792, "grad_norm": 0.24784734845161438, "learning_rate": 1.653759698531085e-05, "loss": 0.4381338357925415, "step": 10197 }, { "epoch": 12.512883435582822, "grad_norm": 0.23187582194805145, "learning_rate": 1.6532874287972308e-05, "loss": 0.8085309267044067, "step": 10198 }, { "epoch": 12.514110429447852, "grad_norm": 0.2699267268180847, "learning_rate": 1.652815193191945e-05, "loss": 0.6427614092826843, "step": 10199 }, { "epoch": 12.515337423312884, "grad_norm": 0.22435663640499115, "learning_rate": 1.6523429917342616e-05, "loss": 0.6234909892082214, "step": 10200 }, { "epoch": 12.516564417177914, "grad_norm": 0.31949323415756226, "learning_rate": 1.651870824443215e-05, "loss": 0.7457947134971619, "step": 10201 }, { "epoch": 12.517791411042944, "grad_norm": 0.25527429580688477, "learning_rate": 1.6513986913378375e-05, "loss": 0.6196423768997192, "step": 10202 }, { "epoch": 12.519018404907975, "grad_norm": 0.25060465931892395, "learning_rate": 1.650926592437158e-05, "loss": 0.5255914926528931, "step": 10203 }, { "epoch": 12.520245398773007, "grad_norm": 0.25395703315734863, "learning_rate": 1.6504545277602056e-05, "loss": 0.6948992013931274, "step": 10204 }, { "epoch": 12.521472392638037, "grad_norm": 0.24280588328838348, "learning_rate": 1.6499824973260087e-05, "loss": 0.5963149666786194, "step": 10205 }, { "epoch": 12.522699386503067, "grad_norm": 0.2760908007621765, "learning_rate": 1.6495105011535926e-05, "loss": 0.7547276616096497, "step": 10206 }, { "epoch": 12.523926380368097, "grad_norm": 0.2577328085899353, "learning_rate": 1.6490385392619828e-05, "loss": 0.5669792890548706, "step": 10207 }, { "epoch": 12.52515337423313, "grad_norm": 0.24297110736370087, "learning_rate": 1.6485666116702025e-05, "loss": 0.4954614043235779, "step": 10208 }, { "epoch": 12.52638036809816, "grad_norm": 0.2597048282623291, "learning_rate": 1.6480947183972732e-05, "loss": 0.5572801232337952, "step": 10209 }, { "epoch": 12.52760736196319, "grad_norm": 0.2869231104850769, "learning_rate": 1.6476228594622174e-05, "loss": 0.5931028723716736, "step": 10210 }, { "epoch": 12.52883435582822, "grad_norm": 0.30259713530540466, "learning_rate": 1.6471510348840518e-05, "loss": 0.7201608419418335, "step": 10211 }, { "epoch": 12.530061349693252, "grad_norm": 0.317692369222641, "learning_rate": 1.6466792446817958e-05, "loss": 0.5756022930145264, "step": 10212 }, { "epoch": 12.531288343558282, "grad_norm": 0.3172726631164551, "learning_rate": 1.646207488874466e-05, "loss": 0.6693395972251892, "step": 10213 }, { "epoch": 12.532515337423312, "grad_norm": 0.3265940546989441, "learning_rate": 1.6457357674810768e-05, "loss": 0.736808717250824, "step": 10214 }, { "epoch": 12.533742331288344, "grad_norm": 0.32763025164604187, "learning_rate": 1.645264080520643e-05, "loss": 0.5954992771148682, "step": 10215 }, { "epoch": 12.534969325153375, "grad_norm": 0.2552138566970825, "learning_rate": 1.6447924280121762e-05, "loss": 0.6088007688522339, "step": 10216 }, { "epoch": 12.536196319018405, "grad_norm": 0.265067994594574, "learning_rate": 1.644320809974687e-05, "loss": 0.8160301446914673, "step": 10217 }, { "epoch": 12.537423312883435, "grad_norm": 0.3055335283279419, "learning_rate": 1.6438492264271864e-05, "loss": 0.5465957522392273, "step": 10218 }, { "epoch": 12.538650306748465, "grad_norm": 0.2854231894016266, "learning_rate": 1.6433776773886807e-05, "loss": 0.7766858339309692, "step": 10219 }, { "epoch": 12.539877300613497, "grad_norm": 0.25049668550491333, "learning_rate": 1.6429061628781784e-05, "loss": 0.774864673614502, "step": 10220 }, { "epoch": 12.541104294478528, "grad_norm": 0.25361910462379456, "learning_rate": 1.6424346829146837e-05, "loss": 0.49386894702911377, "step": 10221 }, { "epoch": 12.542331288343558, "grad_norm": 0.29949522018432617, "learning_rate": 1.6419632375172e-05, "loss": 0.6553192734718323, "step": 10222 }, { "epoch": 12.54355828220859, "grad_norm": 0.25784817337989807, "learning_rate": 1.6414918267047323e-05, "loss": 0.7871114015579224, "step": 10223 }, { "epoch": 12.54478527607362, "grad_norm": 0.2517593502998352, "learning_rate": 1.6410204504962807e-05, "loss": 0.8042715787887573, "step": 10224 }, { "epoch": 12.54601226993865, "grad_norm": 0.31546691060066223, "learning_rate": 1.6405491089108445e-05, "loss": 0.6318793892860413, "step": 10225 }, { "epoch": 12.54723926380368, "grad_norm": 0.3079874515533447, "learning_rate": 1.640077801967422e-05, "loss": 0.6869462728500366, "step": 10226 }, { "epoch": 12.548466257668712, "grad_norm": 0.2696130871772766, "learning_rate": 1.6396065296850114e-05, "loss": 0.6697999238967896, "step": 10227 }, { "epoch": 12.549693251533743, "grad_norm": 0.29774096608161926, "learning_rate": 1.6391352920826074e-05, "loss": 0.6951802968978882, "step": 10228 }, { "epoch": 12.550920245398773, "grad_norm": 0.26564669609069824, "learning_rate": 1.638664089179205e-05, "loss": 0.48829948902130127, "step": 10229 }, { "epoch": 12.552147239263803, "grad_norm": 0.2945033311843872, "learning_rate": 1.6381929209937962e-05, "loss": 0.5651549100875854, "step": 10230 }, { "epoch": 12.553374233128835, "grad_norm": 0.25638073682785034, "learning_rate": 1.6377217875453728e-05, "loss": 0.5194183588027954, "step": 10231 }, { "epoch": 12.554601226993865, "grad_norm": 0.30395519733428955, "learning_rate": 1.637250688852925e-05, "loss": 0.6320400238037109, "step": 10232 }, { "epoch": 12.555828220858896, "grad_norm": 0.2702087163925171, "learning_rate": 1.636779624935441e-05, "loss": 0.38475871086120605, "step": 10233 }, { "epoch": 12.557055214723926, "grad_norm": 0.3467535078525543, "learning_rate": 1.6363085958119087e-05, "loss": 0.4971644878387451, "step": 10234 }, { "epoch": 12.558282208588958, "grad_norm": 0.23088937997817993, "learning_rate": 1.6358376015013134e-05, "loss": 0.4890100359916687, "step": 10235 }, { "epoch": 12.559509202453988, "grad_norm": 0.3281169831752777, "learning_rate": 1.6353666420226394e-05, "loss": 0.7367545366287231, "step": 10236 }, { "epoch": 12.560736196319018, "grad_norm": 0.29532837867736816, "learning_rate": 1.6348957173948703e-05, "loss": 0.49063631892204285, "step": 10237 }, { "epoch": 12.561963190184048, "grad_norm": 0.22956326603889465, "learning_rate": 1.6344248276369876e-05, "loss": 0.782902717590332, "step": 10238 }, { "epoch": 12.56319018404908, "grad_norm": 0.2337227165699005, "learning_rate": 1.6339539727679715e-05, "loss": 0.7083679437637329, "step": 10239 }, { "epoch": 12.56441717791411, "grad_norm": 0.24361997842788696, "learning_rate": 1.6334831528068003e-05, "loss": 0.7400814890861511, "step": 10240 }, { "epoch": 12.565644171779141, "grad_norm": 0.2654111683368683, "learning_rate": 1.633012367772452e-05, "loss": 0.6844011545181274, "step": 10241 }, { "epoch": 12.566871165644171, "grad_norm": 0.2607744336128235, "learning_rate": 1.6325416176839024e-05, "loss": 0.5506038069725037, "step": 10242 }, { "epoch": 12.568098159509203, "grad_norm": 0.24311810731887817, "learning_rate": 1.6320709025601265e-05, "loss": 0.5050079822540283, "step": 10243 }, { "epoch": 12.569325153374233, "grad_norm": 0.3712750971317291, "learning_rate": 1.631600222420097e-05, "loss": 0.707538366317749, "step": 10244 }, { "epoch": 12.570552147239264, "grad_norm": 0.30080172419548035, "learning_rate": 1.6311295772827844e-05, "loss": 0.3629786968231201, "step": 10245 }, { "epoch": 12.571779141104294, "grad_norm": 0.26874446868896484, "learning_rate": 1.630658967167162e-05, "loss": 0.7616629600524902, "step": 10246 }, { "epoch": 12.573006134969326, "grad_norm": 0.28386905789375305, "learning_rate": 1.6301883920921966e-05, "loss": 0.5156406164169312, "step": 10247 }, { "epoch": 12.574233128834356, "grad_norm": 0.2695285677909851, "learning_rate": 1.629717852076857e-05, "loss": 0.4784271717071533, "step": 10248 }, { "epoch": 12.575460122699386, "grad_norm": 0.30138206481933594, "learning_rate": 1.6292473471401075e-05, "loss": 0.5224239826202393, "step": 10249 }, { "epoch": 12.576687116564417, "grad_norm": 0.2805144190788269, "learning_rate": 1.6287768773009148e-05, "loss": 0.7450977563858032, "step": 10250 }, { "epoch": 12.577914110429449, "grad_norm": 0.31247222423553467, "learning_rate": 1.6283064425782417e-05, "loss": 0.4370138645172119, "step": 10251 }, { "epoch": 12.579141104294479, "grad_norm": 0.34006431698799133, "learning_rate": 1.627836042991049e-05, "loss": 0.5987051129341125, "step": 10252 }, { "epoch": 12.580368098159509, "grad_norm": 0.2987407147884369, "learning_rate": 1.6273656785582986e-05, "loss": 0.6232495903968811, "step": 10253 }, { "epoch": 12.58159509202454, "grad_norm": 0.25938552618026733, "learning_rate": 1.626895349298948e-05, "loss": 0.7257943153381348, "step": 10254 }, { "epoch": 12.582822085889571, "grad_norm": 0.24959109723567963, "learning_rate": 1.6264250552319562e-05, "loss": 0.7050656080245972, "step": 10255 }, { "epoch": 12.584049079754601, "grad_norm": 0.23452630639076233, "learning_rate": 1.6259547963762793e-05, "loss": 0.6340435743331909, "step": 10256 }, { "epoch": 12.585276073619632, "grad_norm": 0.2988729476928711, "learning_rate": 1.625484572750871e-05, "loss": 0.782375693321228, "step": 10257 }, { "epoch": 12.586503067484662, "grad_norm": 0.27389565110206604, "learning_rate": 1.6250143843746856e-05, "loss": 0.5642969608306885, "step": 10258 }, { "epoch": 12.587730061349694, "grad_norm": 0.2791449725627899, "learning_rate": 1.6245442312666753e-05, "loss": 0.7251659035682678, "step": 10259 }, { "epoch": 12.588957055214724, "grad_norm": 0.2600393295288086, "learning_rate": 1.6240741134457892e-05, "loss": 0.5487406253814697, "step": 10260 }, { "epoch": 12.590184049079754, "grad_norm": 0.2188514918088913, "learning_rate": 1.6236040309309786e-05, "loss": 0.6871216297149658, "step": 10261 }, { "epoch": 12.591411042944785, "grad_norm": 0.3399888873100281, "learning_rate": 1.623133983741189e-05, "loss": 0.4631440341472626, "step": 10262 }, { "epoch": 12.592638036809817, "grad_norm": 0.2639720141887665, "learning_rate": 1.622663971895368e-05, "loss": 0.5836132764816284, "step": 10263 }, { "epoch": 12.593865030674847, "grad_norm": 0.2467237412929535, "learning_rate": 1.6221939954124598e-05, "loss": 0.6206269264221191, "step": 10264 }, { "epoch": 12.595092024539877, "grad_norm": 0.29634979367256165, "learning_rate": 1.621724054311408e-05, "loss": 0.7204985618591309, "step": 10265 }, { "epoch": 12.596319018404907, "grad_norm": 0.25493040680885315, "learning_rate": 1.6212541486111554e-05, "loss": 0.6400631666183472, "step": 10266 }, { "epoch": 12.59754601226994, "grad_norm": 0.27182823419570923, "learning_rate": 1.6207842783306402e-05, "loss": 0.4619565010070801, "step": 10267 }, { "epoch": 12.59877300613497, "grad_norm": 0.2903321385383606, "learning_rate": 1.6203144434888045e-05, "loss": 0.6315851807594299, "step": 10268 }, { "epoch": 12.6, "grad_norm": 0.2542795240879059, "learning_rate": 1.619844644104584e-05, "loss": 0.6787106394767761, "step": 10269 }, { "epoch": 12.60122699386503, "grad_norm": 0.25688859820365906, "learning_rate": 1.6193748801969163e-05, "loss": 0.6970095038414001, "step": 10270 }, { "epoch": 12.602453987730062, "grad_norm": 0.29898539185523987, "learning_rate": 1.6189051517847355e-05, "loss": 0.8012993931770325, "step": 10271 }, { "epoch": 12.603680981595092, "grad_norm": 0.25625088810920715, "learning_rate": 1.6184354588869744e-05, "loss": 0.6529901027679443, "step": 10272 }, { "epoch": 12.604907975460122, "grad_norm": 0.24639873206615448, "learning_rate": 1.6179658015225663e-05, "loss": 0.5901649594306946, "step": 10273 }, { "epoch": 12.606134969325154, "grad_norm": 0.2669981122016907, "learning_rate": 1.6174961797104404e-05, "loss": 0.6819921731948853, "step": 10274 }, { "epoch": 12.607361963190185, "grad_norm": 0.2951301634311676, "learning_rate": 1.6170265934695275e-05, "loss": 0.7491562366485596, "step": 10275 }, { "epoch": 12.608588957055215, "grad_norm": 0.25426942110061646, "learning_rate": 1.6165570428187536e-05, "loss": 0.732937216758728, "step": 10276 }, { "epoch": 12.609815950920245, "grad_norm": 0.30270618200302124, "learning_rate": 1.6160875277770453e-05, "loss": 0.5575641989707947, "step": 10277 }, { "epoch": 12.611042944785275, "grad_norm": 0.25884389877319336, "learning_rate": 1.6156180483633287e-05, "loss": 0.5401222705841064, "step": 10278 }, { "epoch": 12.612269938650307, "grad_norm": 0.30155548453330994, "learning_rate": 1.6151486045965257e-05, "loss": 0.7422527074813843, "step": 10279 }, { "epoch": 12.613496932515337, "grad_norm": 0.30290618538856506, "learning_rate": 1.6146791964955595e-05, "loss": 0.8334518671035767, "step": 10280 }, { "epoch": 12.614723926380368, "grad_norm": 0.2812718152999878, "learning_rate": 1.614209824079349e-05, "loss": 0.6128717660903931, "step": 10281 }, { "epoch": 12.6159509202454, "grad_norm": 0.2613334357738495, "learning_rate": 1.6137404873668142e-05, "loss": 0.6709800958633423, "step": 10282 }, { "epoch": 12.61717791411043, "grad_norm": 0.3414847254753113, "learning_rate": 1.6132711863768728e-05, "loss": 0.5604519844055176, "step": 10283 }, { "epoch": 12.61840490797546, "grad_norm": 0.3062272071838379, "learning_rate": 1.612801921128441e-05, "loss": 0.5709648132324219, "step": 10284 }, { "epoch": 12.61963190184049, "grad_norm": 0.37959563732147217, "learning_rate": 1.6123326916404335e-05, "loss": 0.494035005569458, "step": 10285 }, { "epoch": 12.62085889570552, "grad_norm": 0.2397894561290741, "learning_rate": 1.611863497931763e-05, "loss": 0.6343711614608765, "step": 10286 }, { "epoch": 12.622085889570553, "grad_norm": 0.2606704831123352, "learning_rate": 1.611394340021342e-05, "loss": 0.6289659738540649, "step": 10287 }, { "epoch": 12.623312883435583, "grad_norm": 0.2392883598804474, "learning_rate": 1.6109252179280815e-05, "loss": 0.68366539478302, "step": 10288 }, { "epoch": 12.624539877300613, "grad_norm": 0.3922201097011566, "learning_rate": 1.6104561316708883e-05, "loss": 0.4419444799423218, "step": 10289 }, { "epoch": 12.625766871165645, "grad_norm": 0.27256765961647034, "learning_rate": 1.609987081268672e-05, "loss": 0.6292116045951843, "step": 10290 }, { "epoch": 12.626993865030675, "grad_norm": 0.30638036131858826, "learning_rate": 1.609518066740338e-05, "loss": 0.5126262903213501, "step": 10291 }, { "epoch": 12.628220858895705, "grad_norm": 0.2555093467235565, "learning_rate": 1.6090490881047917e-05, "loss": 0.6413840651512146, "step": 10292 }, { "epoch": 12.629447852760736, "grad_norm": 0.29405200481414795, "learning_rate": 1.608580145380935e-05, "loss": 0.6483467817306519, "step": 10293 }, { "epoch": 12.630674846625768, "grad_norm": 0.2447819709777832, "learning_rate": 1.6081112385876705e-05, "loss": 0.5367941856384277, "step": 10294 }, { "epoch": 12.631901840490798, "grad_norm": 0.29616042971611023, "learning_rate": 1.607642367743898e-05, "loss": 0.5997879505157471, "step": 10295 }, { "epoch": 12.633128834355828, "grad_norm": 0.26368942856788635, "learning_rate": 1.607173532868517e-05, "loss": 0.5148507356643677, "step": 10296 }, { "epoch": 12.634355828220858, "grad_norm": 0.2983103394508362, "learning_rate": 1.606704733980424e-05, "loss": 0.5985435247421265, "step": 10297 }, { "epoch": 12.63558282208589, "grad_norm": 0.2527945339679718, "learning_rate": 1.6062359710985156e-05, "loss": 0.7072142362594604, "step": 10298 }, { "epoch": 12.63680981595092, "grad_norm": 0.2935636341571808, "learning_rate": 1.6057672442416866e-05, "loss": 0.6571282148361206, "step": 10299 }, { "epoch": 12.63803680981595, "grad_norm": 0.23591217398643494, "learning_rate": 1.6052985534288287e-05, "loss": 0.6185701489448547, "step": 10300 }, { "epoch": 12.639263803680981, "grad_norm": 0.23002983629703522, "learning_rate": 1.6048298986788347e-05, "loss": 0.6549770832061768, "step": 10301 }, { "epoch": 12.640490797546013, "grad_norm": 0.21839424967765808, "learning_rate": 1.6043612800105946e-05, "loss": 0.5723908543586731, "step": 10302 }, { "epoch": 12.641717791411043, "grad_norm": 0.25069838762283325, "learning_rate": 1.6038926974429963e-05, "loss": 0.6416826844215393, "step": 10303 }, { "epoch": 12.642944785276073, "grad_norm": 0.20897053182125092, "learning_rate": 1.6034241509949276e-05, "loss": 0.42992299795150757, "step": 10304 }, { "epoch": 12.644171779141104, "grad_norm": 0.2300322949886322, "learning_rate": 1.6029556406852753e-05, "loss": 0.6536756753921509, "step": 10305 }, { "epoch": 12.645398773006136, "grad_norm": 0.2554857134819031, "learning_rate": 1.6024871665329215e-05, "loss": 0.6670893430709839, "step": 10306 }, { "epoch": 12.646625766871166, "grad_norm": 0.2974044382572174, "learning_rate": 1.6020187285567513e-05, "loss": 0.5316776633262634, "step": 10307 }, { "epoch": 12.647852760736196, "grad_norm": 0.2781546413898468, "learning_rate": 1.601550326775644e-05, "loss": 0.7992411255836487, "step": 10308 }, { "epoch": 12.649079754601226, "grad_norm": 0.28635647892951965, "learning_rate": 1.6010819612084806e-05, "loss": 0.3734503984451294, "step": 10309 }, { "epoch": 12.650306748466258, "grad_norm": 0.3126896321773529, "learning_rate": 1.6006136318741406e-05, "loss": 0.43734508752822876, "step": 10310 }, { "epoch": 12.651533742331289, "grad_norm": 0.225031778216362, "learning_rate": 1.600145338791498e-05, "loss": 0.8062106966972351, "step": 10311 }, { "epoch": 12.652760736196319, "grad_norm": 0.2697991728782654, "learning_rate": 1.599677081979432e-05, "loss": 0.5568410158157349, "step": 10312 }, { "epoch": 12.653987730061349, "grad_norm": 0.27503758668899536, "learning_rate": 1.5992088614568142e-05, "loss": 0.620410680770874, "step": 10313 }, { "epoch": 12.655214723926381, "grad_norm": 0.2632960379123688, "learning_rate": 1.598740677242519e-05, "loss": 0.4688619375228882, "step": 10314 }, { "epoch": 12.656441717791411, "grad_norm": 0.3050486743450165, "learning_rate": 1.5982725293554156e-05, "loss": 0.6380386352539062, "step": 10315 }, { "epoch": 12.657668711656441, "grad_norm": 0.27465754747390747, "learning_rate": 1.5978044178143757e-05, "loss": 0.7492796182632446, "step": 10316 }, { "epoch": 12.658895705521472, "grad_norm": 0.20728980004787445, "learning_rate": 1.597336342638266e-05, "loss": 0.5455673336982727, "step": 10317 }, { "epoch": 12.660122699386504, "grad_norm": 0.30310478806495667, "learning_rate": 1.596868303845954e-05, "loss": 0.8046605587005615, "step": 10318 }, { "epoch": 12.661349693251534, "grad_norm": 0.340225487947464, "learning_rate": 1.596400301456305e-05, "loss": 0.45592373609542847, "step": 10319 }, { "epoch": 12.662576687116564, "grad_norm": 0.2450176179409027, "learning_rate": 1.5959323354881825e-05, "loss": 0.7035637497901917, "step": 10320 }, { "epoch": 12.663803680981594, "grad_norm": 0.27689751982688904, "learning_rate": 1.5954644059604494e-05, "loss": 0.5467542409896851, "step": 10321 }, { "epoch": 12.665030674846626, "grad_norm": 0.2648445963859558, "learning_rate": 1.5949965128919657e-05, "loss": 0.6916453838348389, "step": 10322 }, { "epoch": 12.666257668711657, "grad_norm": 0.2625950872898102, "learning_rate": 1.5945286563015915e-05, "loss": 0.4561840891838074, "step": 10323 }, { "epoch": 12.667484662576687, "grad_norm": 0.3104582726955414, "learning_rate": 1.5940608362081855e-05, "loss": 0.5704874396324158, "step": 10324 }, { "epoch": 12.668711656441717, "grad_norm": 0.2624647319316864, "learning_rate": 1.5935930526306026e-05, "loss": 0.5995011925697327, "step": 10325 }, { "epoch": 12.669938650306749, "grad_norm": 0.24285493791103363, "learning_rate": 1.593125305587699e-05, "loss": 0.6295081377029419, "step": 10326 }, { "epoch": 12.67116564417178, "grad_norm": 0.26370102167129517, "learning_rate": 1.5926575950983276e-05, "loss": 0.6918890476226807, "step": 10327 }, { "epoch": 12.67239263803681, "grad_norm": 0.25490084290504456, "learning_rate": 1.5921899211813404e-05, "loss": 0.516855001449585, "step": 10328 }, { "epoch": 12.67361963190184, "grad_norm": 0.25907477736473083, "learning_rate": 1.591722283855589e-05, "loss": 0.6495572924613953, "step": 10329 }, { "epoch": 12.674846625766872, "grad_norm": 0.23929814994335175, "learning_rate": 1.5912546831399212e-05, "loss": 0.5393921732902527, "step": 10330 }, { "epoch": 12.676073619631902, "grad_norm": 0.27558788657188416, "learning_rate": 1.590787119053186e-05, "loss": 0.352449893951416, "step": 10331 }, { "epoch": 12.677300613496932, "grad_norm": 0.2638760805130005, "learning_rate": 1.5903195916142284e-05, "loss": 0.6589930057525635, "step": 10332 }, { "epoch": 12.678527607361962, "grad_norm": 0.32551589608192444, "learning_rate": 1.589852100841893e-05, "loss": 0.5395137667655945, "step": 10333 }, { "epoch": 12.679754601226994, "grad_norm": 0.24155013263225555, "learning_rate": 1.589384646755024e-05, "loss": 0.7145059704780579, "step": 10334 }, { "epoch": 12.680981595092025, "grad_norm": 0.24262699484825134, "learning_rate": 1.5889172293724633e-05, "loss": 0.49479418992996216, "step": 10335 }, { "epoch": 12.682208588957055, "grad_norm": 0.23476944863796234, "learning_rate": 1.58844984871305e-05, "loss": 0.6883655190467834, "step": 10336 }, { "epoch": 12.683435582822085, "grad_norm": 0.29631155729293823, "learning_rate": 1.5879825047956236e-05, "loss": 0.6894127130508423, "step": 10337 }, { "epoch": 12.684662576687117, "grad_norm": 0.26983073353767395, "learning_rate": 1.587515197639022e-05, "loss": 0.5833662748336792, "step": 10338 }, { "epoch": 12.685889570552147, "grad_norm": 0.444424033164978, "learning_rate": 1.5870479272620792e-05, "loss": 0.4332401156425476, "step": 10339 }, { "epoch": 12.687116564417177, "grad_norm": 0.3154613971710205, "learning_rate": 1.5865806936836315e-05, "loss": 0.6598443984985352, "step": 10340 }, { "epoch": 12.68834355828221, "grad_norm": 0.2529717981815338, "learning_rate": 1.5861134969225106e-05, "loss": 0.39893707633018494, "step": 10341 }, { "epoch": 12.68957055214724, "grad_norm": 0.3151600956916809, "learning_rate": 1.585646336997548e-05, "loss": 0.588534414768219, "step": 10342 }, { "epoch": 12.69079754601227, "grad_norm": 0.34452179074287415, "learning_rate": 1.5851792139275738e-05, "loss": 0.5439419150352478, "step": 10343 }, { "epoch": 12.6920245398773, "grad_norm": 0.25214624404907227, "learning_rate": 1.5847121277314166e-05, "loss": 0.7580349445343018, "step": 10344 }, { "epoch": 12.69325153374233, "grad_norm": 0.2750740349292755, "learning_rate": 1.5842450784279027e-05, "loss": 0.5331127643585205, "step": 10345 }, { "epoch": 12.694478527607362, "grad_norm": 0.291413813829422, "learning_rate": 1.583778066035858e-05, "loss": 0.6473420858383179, "step": 10346 }, { "epoch": 12.695705521472393, "grad_norm": 0.2757070064544678, "learning_rate": 1.583311090574106e-05, "loss": 0.6370989084243774, "step": 10347 }, { "epoch": 12.696932515337423, "grad_norm": 0.23132558166980743, "learning_rate": 1.58284415206147e-05, "loss": 0.5790682435035706, "step": 10348 }, { "epoch": 12.698159509202455, "grad_norm": 0.2901867926120758, "learning_rate": 1.58237725051677e-05, "loss": 0.8201746940612793, "step": 10349 }, { "epoch": 12.699386503067485, "grad_norm": 0.2677927613258362, "learning_rate": 1.5819103859588263e-05, "loss": 0.5937792658805847, "step": 10350 }, { "epoch": 12.700613496932515, "grad_norm": 0.2671263813972473, "learning_rate": 1.5814435584064563e-05, "loss": 0.47643357515335083, "step": 10351 }, { "epoch": 12.701840490797546, "grad_norm": 0.3305094540119171, "learning_rate": 1.5809767678784766e-05, "loss": 0.5276317596435547, "step": 10352 }, { "epoch": 12.703067484662578, "grad_norm": 0.24511884152889252, "learning_rate": 1.5805100143937022e-05, "loss": 0.8546044826507568, "step": 10353 }, { "epoch": 12.704294478527608, "grad_norm": 0.28289973735809326, "learning_rate": 1.5800432979709467e-05, "loss": 0.5303910374641418, "step": 10354 }, { "epoch": 12.705521472392638, "grad_norm": 0.26790961623191833, "learning_rate": 1.579576618629022e-05, "loss": 0.7111334800720215, "step": 10355 }, { "epoch": 12.706748466257668, "grad_norm": 0.2845519185066223, "learning_rate": 1.579109976386738e-05, "loss": 0.5928661823272705, "step": 10356 }, { "epoch": 12.7079754601227, "grad_norm": 0.35217222571372986, "learning_rate": 1.5786433712629055e-05, "loss": 0.3637310564517975, "step": 10357 }, { "epoch": 12.70920245398773, "grad_norm": 0.2758716344833374, "learning_rate": 1.57817680327633e-05, "loss": 0.3128761947154999, "step": 10358 }, { "epoch": 12.71042944785276, "grad_norm": 0.2818537950515747, "learning_rate": 1.577710272445819e-05, "loss": 0.6281648874282837, "step": 10359 }, { "epoch": 12.71165644171779, "grad_norm": 0.3247114419937134, "learning_rate": 1.5772437787901766e-05, "loss": 0.4462074041366577, "step": 10360 }, { "epoch": 12.712883435582823, "grad_norm": 0.326476514339447, "learning_rate": 1.5767773223282054e-05, "loss": 0.6376135945320129, "step": 10361 }, { "epoch": 12.714110429447853, "grad_norm": 0.3287084698677063, "learning_rate": 1.5763109030787076e-05, "loss": 0.5775983929634094, "step": 10362 }, { "epoch": 12.715337423312883, "grad_norm": 0.2804040014743805, "learning_rate": 1.5758445210604823e-05, "loss": 0.6481447219848633, "step": 10363 }, { "epoch": 12.716564417177914, "grad_norm": 0.27857816219329834, "learning_rate": 1.575378176292329e-05, "loss": 0.5064512491226196, "step": 10364 }, { "epoch": 12.717791411042946, "grad_norm": 0.26629751920700073, "learning_rate": 1.5749118687930443e-05, "loss": 0.6929815411567688, "step": 10365 }, { "epoch": 12.719018404907976, "grad_norm": 0.28042393922805786, "learning_rate": 1.5744455985814236e-05, "loss": 0.48454153537750244, "step": 10366 }, { "epoch": 12.720245398773006, "grad_norm": 0.3001095950603485, "learning_rate": 1.5739793656762614e-05, "loss": 0.6574556231498718, "step": 10367 }, { "epoch": 12.721472392638036, "grad_norm": 0.2563040852546692, "learning_rate": 1.5735131700963497e-05, "loss": 0.4545567035675049, "step": 10368 }, { "epoch": 12.722699386503068, "grad_norm": 0.2718988060951233, "learning_rate": 1.573047011860479e-05, "loss": 0.4895554780960083, "step": 10369 }, { "epoch": 12.723926380368098, "grad_norm": 0.28331923484802246, "learning_rate": 1.5725808909874403e-05, "loss": 0.7706087827682495, "step": 10370 }, { "epoch": 12.725153374233129, "grad_norm": 0.2370520830154419, "learning_rate": 1.5721148074960206e-05, "loss": 0.5549569129943848, "step": 10371 }, { "epoch": 12.726380368098159, "grad_norm": 0.29191598296165466, "learning_rate": 1.571648761405007e-05, "loss": 0.5792757868766785, "step": 10372 }, { "epoch": 12.72760736196319, "grad_norm": 0.2545836865901947, "learning_rate": 1.571182752733183e-05, "loss": 0.8501063585281372, "step": 10373 }, { "epoch": 12.728834355828221, "grad_norm": 0.3350871503353119, "learning_rate": 1.570716781499334e-05, "loss": 0.400352418422699, "step": 10374 }, { "epoch": 12.730061349693251, "grad_norm": 0.25647851824760437, "learning_rate": 1.570250847722241e-05, "loss": 0.7961468696594238, "step": 10375 }, { "epoch": 12.731288343558282, "grad_norm": 0.26465103030204773, "learning_rate": 1.5697849514206846e-05, "loss": 0.7734826803207397, "step": 10376 }, { "epoch": 12.732515337423314, "grad_norm": 0.27479347586631775, "learning_rate": 1.5693190926134437e-05, "loss": 0.8362640142440796, "step": 10377 }, { "epoch": 12.733742331288344, "grad_norm": 0.25783035159111023, "learning_rate": 1.568853271319295e-05, "loss": 0.6213236451148987, "step": 10378 }, { "epoch": 12.734969325153374, "grad_norm": 0.25961238145828247, "learning_rate": 1.568387487557016e-05, "loss": 0.5225649476051331, "step": 10379 }, { "epoch": 12.736196319018404, "grad_norm": 0.2396240532398224, "learning_rate": 1.5679217413453804e-05, "loss": 0.5065149664878845, "step": 10380 }, { "epoch": 12.737423312883436, "grad_norm": 0.6749546527862549, "learning_rate": 1.5674560327031613e-05, "loss": 0.6339707970619202, "step": 10381 }, { "epoch": 12.738650306748466, "grad_norm": 0.25540828704833984, "learning_rate": 1.5669903616491293e-05, "loss": 0.5689408183097839, "step": 10382 }, { "epoch": 12.739877300613497, "grad_norm": 0.26806220412254333, "learning_rate": 1.5665247282020547e-05, "loss": 0.6344542503356934, "step": 10383 }, { "epoch": 12.741104294478527, "grad_norm": 0.23321864008903503, "learning_rate": 1.5660591323807068e-05, "loss": 0.6475205421447754, "step": 10384 }, { "epoch": 12.742331288343559, "grad_norm": 0.2795684039592743, "learning_rate": 1.5655935742038507e-05, "loss": 0.7460982203483582, "step": 10385 }, { "epoch": 12.743558282208589, "grad_norm": 0.2612546980381012, "learning_rate": 1.5651280536902536e-05, "loss": 0.5380372405052185, "step": 10386 }, { "epoch": 12.74478527607362, "grad_norm": 0.2897357642650604, "learning_rate": 1.564662570858677e-05, "loss": 0.6089117527008057, "step": 10387 }, { "epoch": 12.74601226993865, "grad_norm": 0.25547119975090027, "learning_rate": 1.5641971257278852e-05, "loss": 0.45223522186279297, "step": 10388 }, { "epoch": 12.747239263803682, "grad_norm": 0.27540597319602966, "learning_rate": 1.5637317183166386e-05, "loss": 0.7269489765167236, "step": 10389 }, { "epoch": 12.748466257668712, "grad_norm": 0.2733166813850403, "learning_rate": 1.5632663486436953e-05, "loss": 0.6184879541397095, "step": 10390 }, { "epoch": 12.749693251533742, "grad_norm": 0.2327655553817749, "learning_rate": 1.562801016727815e-05, "loss": 0.6950438022613525, "step": 10391 }, { "epoch": 12.750920245398772, "grad_norm": 0.21494938433170319, "learning_rate": 1.5623357225877516e-05, "loss": 0.6382779479026794, "step": 10392 }, { "epoch": 12.752147239263804, "grad_norm": 0.3160698711872101, "learning_rate": 1.5618704662422612e-05, "loss": 0.6335594654083252, "step": 10393 }, { "epoch": 12.753374233128834, "grad_norm": 0.251498818397522, "learning_rate": 1.5614052477100973e-05, "loss": 0.558529257774353, "step": 10394 }, { "epoch": 12.754601226993865, "grad_norm": 0.24745972454547882, "learning_rate": 1.56094006701001e-05, "loss": 0.743963360786438, "step": 10395 }, { "epoch": 12.755828220858895, "grad_norm": 0.26855969429016113, "learning_rate": 1.5604749241607512e-05, "loss": 0.6139498949050903, "step": 10396 }, { "epoch": 12.757055214723927, "grad_norm": 0.234879270195961, "learning_rate": 1.560009819181068e-05, "loss": 0.6437205076217651, "step": 10397 }, { "epoch": 12.758282208588957, "grad_norm": 0.2353936731815338, "learning_rate": 1.5595447520897082e-05, "loss": 0.6001107692718506, "step": 10398 }, { "epoch": 12.759509202453987, "grad_norm": 0.2761699855327606, "learning_rate": 1.5590797229054174e-05, "loss": 0.47685956954956055, "step": 10399 }, { "epoch": 12.76073619631902, "grad_norm": 0.2604544460773468, "learning_rate": 1.5586147316469387e-05, "loss": 0.4571337401866913, "step": 10400 }, { "epoch": 12.76196319018405, "grad_norm": 0.24387316405773163, "learning_rate": 1.5581497783330156e-05, "loss": 0.6931902170181274, "step": 10401 }, { "epoch": 12.76319018404908, "grad_norm": 0.2490062117576599, "learning_rate": 1.5576848629823885e-05, "loss": 0.7073533535003662, "step": 10402 }, { "epoch": 12.76441717791411, "grad_norm": 0.3676087260246277, "learning_rate": 1.5572199856137976e-05, "loss": 0.3941441774368286, "step": 10403 }, { "epoch": 12.76564417177914, "grad_norm": 0.2777763307094574, "learning_rate": 1.5567551462459795e-05, "loss": 0.7753905057907104, "step": 10404 }, { "epoch": 12.766871165644172, "grad_norm": 0.31442946195602417, "learning_rate": 1.5562903448976723e-05, "loss": 0.608895480632782, "step": 10405 }, { "epoch": 12.768098159509202, "grad_norm": 0.3131190240383148, "learning_rate": 1.5558255815876087e-05, "loss": 0.5686376094818115, "step": 10406 }, { "epoch": 12.769325153374233, "grad_norm": 0.24409852921962738, "learning_rate": 1.5553608563345233e-05, "loss": 0.6001149415969849, "step": 10407 }, { "epoch": 12.770552147239265, "grad_norm": 0.2788815498352051, "learning_rate": 1.5548961691571478e-05, "loss": 0.7630085945129395, "step": 10408 }, { "epoch": 12.771779141104295, "grad_norm": 0.2283567488193512, "learning_rate": 1.554431520074212e-05, "loss": 0.5076738595962524, "step": 10409 }, { "epoch": 12.773006134969325, "grad_norm": 0.26784804463386536, "learning_rate": 1.5539669091044453e-05, "loss": 0.6619542837142944, "step": 10410 }, { "epoch": 12.774233128834355, "grad_norm": 0.22545278072357178, "learning_rate": 1.553502336266574e-05, "loss": 0.7019649744033813, "step": 10411 }, { "epoch": 12.775460122699386, "grad_norm": 0.2718367576599121, "learning_rate": 1.5530378015793237e-05, "loss": 0.6242594718933105, "step": 10412 }, { "epoch": 12.776687116564418, "grad_norm": 0.24904491007328033, "learning_rate": 1.5525733050614193e-05, "loss": 0.7969882488250732, "step": 10413 }, { "epoch": 12.777914110429448, "grad_norm": 0.32984107732772827, "learning_rate": 1.5521088467315826e-05, "loss": 0.41640251874923706, "step": 10414 }, { "epoch": 12.779141104294478, "grad_norm": 0.29478737711906433, "learning_rate": 1.5516444266085344e-05, "loss": 0.5962304472923279, "step": 10415 }, { "epoch": 12.78036809815951, "grad_norm": 0.27698343992233276, "learning_rate": 1.5511800447109952e-05, "loss": 0.8319816589355469, "step": 10416 }, { "epoch": 12.78159509202454, "grad_norm": 0.2758386731147766, "learning_rate": 1.550715701057682e-05, "loss": 0.7620525360107422, "step": 10417 }, { "epoch": 12.78282208588957, "grad_norm": 0.2681373953819275, "learning_rate": 1.550251395667312e-05, "loss": 0.6198902130126953, "step": 10418 }, { "epoch": 12.7840490797546, "grad_norm": 0.25673821568489075, "learning_rate": 1.5497871285585984e-05, "loss": 0.534300684928894, "step": 10419 }, { "epoch": 12.785276073619633, "grad_norm": 0.3063642978668213, "learning_rate": 1.5493228997502563e-05, "loss": 0.39275532960891724, "step": 10420 }, { "epoch": 12.786503067484663, "grad_norm": 0.23770248889923096, "learning_rate": 1.548858709260997e-05, "loss": 0.7585238218307495, "step": 10421 }, { "epoch": 12.787730061349693, "grad_norm": 0.23090393841266632, "learning_rate": 1.5483945571095287e-05, "loss": 0.6878893375396729, "step": 10422 }, { "epoch": 12.788957055214723, "grad_norm": 0.26793307065963745, "learning_rate": 1.547930443314563e-05, "loss": 0.5489615201950073, "step": 10423 }, { "epoch": 12.790184049079755, "grad_norm": 0.2663865089416504, "learning_rate": 1.5474663678948053e-05, "loss": 0.8351732492446899, "step": 10424 }, { "epoch": 12.791411042944786, "grad_norm": 0.31993725895881653, "learning_rate": 1.5470023308689623e-05, "loss": 0.6572630405426025, "step": 10425 }, { "epoch": 12.792638036809816, "grad_norm": 0.2884480953216553, "learning_rate": 1.546538332255737e-05, "loss": 0.5202093124389648, "step": 10426 }, { "epoch": 12.793865030674846, "grad_norm": 0.2937318682670593, "learning_rate": 1.5460743720738326e-05, "loss": 0.5590159893035889, "step": 10427 }, { "epoch": 12.795092024539878, "grad_norm": 0.2621413469314575, "learning_rate": 1.5456104503419487e-05, "loss": 0.8195525407791138, "step": 10428 }, { "epoch": 12.796319018404908, "grad_norm": 0.24017809331417084, "learning_rate": 1.545146567078786e-05, "loss": 0.6738446354866028, "step": 10429 }, { "epoch": 12.797546012269938, "grad_norm": 0.30261409282684326, "learning_rate": 1.5446827223030422e-05, "loss": 0.5746402740478516, "step": 10430 }, { "epoch": 12.798773006134969, "grad_norm": 0.2222578525543213, "learning_rate": 1.5442189160334124e-05, "loss": 0.6628122329711914, "step": 10431 }, { "epoch": 12.8, "grad_norm": 0.28833872079849243, "learning_rate": 1.543755148288593e-05, "loss": 0.6688283681869507, "step": 10432 }, { "epoch": 12.801226993865031, "grad_norm": 0.32325759530067444, "learning_rate": 1.5432914190872757e-05, "loss": 0.5365858674049377, "step": 10433 }, { "epoch": 12.802453987730061, "grad_norm": 0.27405381202697754, "learning_rate": 1.542827728448153e-05, "loss": 0.7178012728691101, "step": 10434 }, { "epoch": 12.803680981595091, "grad_norm": 0.31782054901123047, "learning_rate": 1.5423640763899146e-05, "loss": 0.6342248916625977, "step": 10435 }, { "epoch": 12.804907975460123, "grad_norm": 0.24828211963176727, "learning_rate": 1.5419004629312486e-05, "loss": 0.8129331469535828, "step": 10436 }, { "epoch": 12.806134969325154, "grad_norm": 0.27022284269332886, "learning_rate": 1.541436888090843e-05, "loss": 0.4954041838645935, "step": 10437 }, { "epoch": 12.807361963190184, "grad_norm": 0.24797698855400085, "learning_rate": 1.540973351887382e-05, "loss": 0.8097396492958069, "step": 10438 }, { "epoch": 12.808588957055214, "grad_norm": 0.31206953525543213, "learning_rate": 1.54050985433955e-05, "loss": 0.8228416442871094, "step": 10439 }, { "epoch": 12.809815950920246, "grad_norm": 0.3467012047767639, "learning_rate": 1.5400463954660296e-05, "loss": 0.6884624361991882, "step": 10440 }, { "epoch": 12.811042944785276, "grad_norm": 0.3364018499851227, "learning_rate": 1.5395829752855008e-05, "loss": 0.5416916608810425, "step": 10441 }, { "epoch": 12.812269938650306, "grad_norm": 0.23497967422008514, "learning_rate": 1.5391195938166434e-05, "loss": 0.733086347579956, "step": 10442 }, { "epoch": 12.813496932515337, "grad_norm": 0.3302067816257477, "learning_rate": 1.538656251078134e-05, "loss": 0.6505687236785889, "step": 10443 }, { "epoch": 12.814723926380369, "grad_norm": 0.2614805996417999, "learning_rate": 1.53819294708865e-05, "loss": 0.6937987208366394, "step": 10444 }, { "epoch": 12.815950920245399, "grad_norm": 0.27013254165649414, "learning_rate": 1.5377296818668638e-05, "loss": 0.8286452293395996, "step": 10445 }, { "epoch": 12.81717791411043, "grad_norm": 0.27876555919647217, "learning_rate": 1.537266455431451e-05, "loss": 0.4086189866065979, "step": 10446 }, { "epoch": 12.81840490797546, "grad_norm": 0.2629670798778534, "learning_rate": 1.536803267801081e-05, "loss": 0.6496949791908264, "step": 10447 }, { "epoch": 12.819631901840491, "grad_norm": 0.25026658177375793, "learning_rate": 1.5363401189944244e-05, "loss": 0.4640228748321533, "step": 10448 }, { "epoch": 12.820858895705522, "grad_norm": 0.3048974573612213, "learning_rate": 1.5358770090301495e-05, "loss": 0.6417364478111267, "step": 10449 }, { "epoch": 12.822085889570552, "grad_norm": 0.22488375008106232, "learning_rate": 1.5354139379269217e-05, "loss": 0.7165993452072144, "step": 10450 }, { "epoch": 12.823312883435582, "grad_norm": 0.24752946197986603, "learning_rate": 1.534950905703408e-05, "loss": 0.5884678959846497, "step": 10451 }, { "epoch": 12.824539877300614, "grad_norm": 0.2597396969795227, "learning_rate": 1.5344879123782703e-05, "loss": 0.6448565721511841, "step": 10452 }, { "epoch": 12.825766871165644, "grad_norm": 0.2733462154865265, "learning_rate": 1.534024957970171e-05, "loss": 0.5341026186943054, "step": 10453 }, { "epoch": 12.826993865030675, "grad_norm": 0.2913404703140259, "learning_rate": 1.533562042497771e-05, "loss": 0.7447823286056519, "step": 10454 }, { "epoch": 12.828220858895705, "grad_norm": 0.3671053946018219, "learning_rate": 1.5330991659797285e-05, "loss": 0.516997218132019, "step": 10455 }, { "epoch": 12.829447852760737, "grad_norm": 0.24274732172489166, "learning_rate": 1.532636328434701e-05, "loss": 0.4899592995643616, "step": 10456 }, { "epoch": 12.830674846625767, "grad_norm": 0.2752110958099365, "learning_rate": 1.5321735298813442e-05, "loss": 0.5515711903572083, "step": 10457 }, { "epoch": 12.831901840490797, "grad_norm": 0.3303106427192688, "learning_rate": 1.5317107703383117e-05, "loss": 0.6219760179519653, "step": 10458 }, { "epoch": 12.833128834355827, "grad_norm": 0.3086729049682617, "learning_rate": 1.5312480498242572e-05, "loss": 0.563400149345398, "step": 10459 }, { "epoch": 12.83435582822086, "grad_norm": 0.2399756759405136, "learning_rate": 1.53078536835783e-05, "loss": 0.5716342926025391, "step": 10460 }, { "epoch": 12.83558282208589, "grad_norm": 0.3030508756637573, "learning_rate": 1.53032272595768e-05, "loss": 0.4875631332397461, "step": 10461 }, { "epoch": 12.83680981595092, "grad_norm": 0.2653288245201111, "learning_rate": 1.5298601226424567e-05, "loss": 0.6066681742668152, "step": 10462 }, { "epoch": 12.83803680981595, "grad_norm": 0.33706140518188477, "learning_rate": 1.529397558430804e-05, "loss": 0.5616824626922607, "step": 10463 }, { "epoch": 12.839263803680982, "grad_norm": 0.26865869760513306, "learning_rate": 1.5289350333413675e-05, "loss": 0.7088884115219116, "step": 10464 }, { "epoch": 12.840490797546012, "grad_norm": 0.25229108333587646, "learning_rate": 1.5284725473927905e-05, "loss": 0.41676804423332214, "step": 10465 }, { "epoch": 12.841717791411043, "grad_norm": 0.2851940989494324, "learning_rate": 1.5280101006037138e-05, "loss": 0.7034831047058105, "step": 10466 }, { "epoch": 12.842944785276075, "grad_norm": 0.23331227898597717, "learning_rate": 1.5275476929927773e-05, "loss": 0.7417961359024048, "step": 10467 }, { "epoch": 12.844171779141105, "grad_norm": 0.3086407482624054, "learning_rate": 1.527085324578621e-05, "loss": 0.6436289548873901, "step": 10468 }, { "epoch": 12.845398773006135, "grad_norm": 0.2890776991844177, "learning_rate": 1.52662299537988e-05, "loss": 0.6069462299346924, "step": 10469 }, { "epoch": 12.846625766871165, "grad_norm": 0.2905136048793793, "learning_rate": 1.5261607054151893e-05, "loss": 0.5059443712234497, "step": 10470 }, { "epoch": 12.847852760736195, "grad_norm": 0.2468654215335846, "learning_rate": 1.525698454703184e-05, "loss": 0.41441577672958374, "step": 10471 }, { "epoch": 12.849079754601227, "grad_norm": 0.29091763496398926, "learning_rate": 1.5252362432624947e-05, "loss": 0.8867733478546143, "step": 10472 }, { "epoch": 12.850306748466258, "grad_norm": 0.2529218792915344, "learning_rate": 1.5247740711117528e-05, "loss": 0.5818251371383667, "step": 10473 }, { "epoch": 12.851533742331288, "grad_norm": 0.2894029915332794, "learning_rate": 1.524311938269586e-05, "loss": 0.6457875967025757, "step": 10474 }, { "epoch": 12.85276073619632, "grad_norm": 0.27615734934806824, "learning_rate": 1.5238498447546223e-05, "loss": 0.48824048042297363, "step": 10475 }, { "epoch": 12.85398773006135, "grad_norm": 0.34441936016082764, "learning_rate": 1.523387790585488e-05, "loss": 0.780368983745575, "step": 10476 }, { "epoch": 12.85521472392638, "grad_norm": 0.31822824478149414, "learning_rate": 1.5229257757808058e-05, "loss": 0.5007065534591675, "step": 10477 }, { "epoch": 12.85644171779141, "grad_norm": 0.3029169738292694, "learning_rate": 1.5224638003591996e-05, "loss": 0.41089510917663574, "step": 10478 }, { "epoch": 12.857668711656443, "grad_norm": 0.2820438742637634, "learning_rate": 1.5220018643392885e-05, "loss": 0.5192344188690186, "step": 10479 }, { "epoch": 12.858895705521473, "grad_norm": 0.2460481822490692, "learning_rate": 1.5215399677396936e-05, "loss": 0.7477299571037292, "step": 10480 }, { "epoch": 12.860122699386503, "grad_norm": 0.27175047993659973, "learning_rate": 1.521078110579032e-05, "loss": 0.5336948037147522, "step": 10481 }, { "epoch": 12.861349693251533, "grad_norm": 0.3126186430454254, "learning_rate": 1.5206162928759193e-05, "loss": 0.5704324245452881, "step": 10482 }, { "epoch": 12.862576687116565, "grad_norm": 0.28790608048439026, "learning_rate": 1.5201545146489713e-05, "loss": 0.6534237861633301, "step": 10483 }, { "epoch": 12.863803680981595, "grad_norm": 0.29802146553993225, "learning_rate": 1.5196927759167994e-05, "loss": 0.6233423948287964, "step": 10484 }, { "epoch": 12.865030674846626, "grad_norm": 0.2740320861339569, "learning_rate": 1.519231076698016e-05, "loss": 0.7518777251243591, "step": 10485 }, { "epoch": 12.866257668711656, "grad_norm": 0.22964628040790558, "learning_rate": 1.5187694170112309e-05, "loss": 0.6618506908416748, "step": 10486 }, { "epoch": 12.867484662576688, "grad_norm": 0.3079987168312073, "learning_rate": 1.518307796875052e-05, "loss": 0.45574820041656494, "step": 10487 }, { "epoch": 12.868711656441718, "grad_norm": 0.25345146656036377, "learning_rate": 1.517846216308086e-05, "loss": 0.5925365090370178, "step": 10488 }, { "epoch": 12.869938650306748, "grad_norm": 0.265171080827713, "learning_rate": 1.5173846753289368e-05, "loss": 0.553442120552063, "step": 10489 }, { "epoch": 12.871165644171779, "grad_norm": 0.24313423037528992, "learning_rate": 1.5169231739562102e-05, "loss": 0.5137978792190552, "step": 10490 }, { "epoch": 12.87239263803681, "grad_norm": 0.2759819030761719, "learning_rate": 1.5164617122085056e-05, "loss": 0.5973513126373291, "step": 10491 }, { "epoch": 12.87361963190184, "grad_norm": 0.2893002927303314, "learning_rate": 1.5160002901044251e-05, "loss": 0.6704291105270386, "step": 10492 }, { "epoch": 12.874846625766871, "grad_norm": 0.28184542059898376, "learning_rate": 1.5155389076625662e-05, "loss": 0.6112995743751526, "step": 10493 }, { "epoch": 12.876073619631901, "grad_norm": 0.3162164092063904, "learning_rate": 1.515077564901526e-05, "loss": 0.513408899307251, "step": 10494 }, { "epoch": 12.877300613496933, "grad_norm": 0.2872011959552765, "learning_rate": 1.5146162618399004e-05, "loss": 0.7037478685379028, "step": 10495 }, { "epoch": 12.878527607361963, "grad_norm": 0.3097578287124634, "learning_rate": 1.5141549984962825e-05, "loss": 0.7831158638000488, "step": 10496 }, { "epoch": 12.879754601226994, "grad_norm": 0.27349919080734253, "learning_rate": 1.5136937748892654e-05, "loss": 0.7003238201141357, "step": 10497 }, { "epoch": 12.880981595092024, "grad_norm": 0.26856496930122375, "learning_rate": 1.5132325910374387e-05, "loss": 0.3808272182941437, "step": 10498 }, { "epoch": 12.882208588957056, "grad_norm": 0.2765170931816101, "learning_rate": 1.5127714469593918e-05, "loss": 0.5144084692001343, "step": 10499 }, { "epoch": 12.883435582822086, "grad_norm": 0.23896293342113495, "learning_rate": 1.512310342673713e-05, "loss": 0.5083152055740356, "step": 10500 }, { "epoch": 12.884662576687116, "grad_norm": 0.28449127078056335, "learning_rate": 1.5118492781989862e-05, "loss": 0.34063664078712463, "step": 10501 }, { "epoch": 12.885889570552147, "grad_norm": 0.28321802616119385, "learning_rate": 1.5113882535537977e-05, "loss": 0.9568904042243958, "step": 10502 }, { "epoch": 12.887116564417179, "grad_norm": 0.20974145829677582, "learning_rate": 1.5109272687567285e-05, "loss": 0.5715916156768799, "step": 10503 }, { "epoch": 12.888343558282209, "grad_norm": 0.2599649429321289, "learning_rate": 1.5104663238263602e-05, "loss": 0.5490410327911377, "step": 10504 }, { "epoch": 12.889570552147239, "grad_norm": 0.2521940767765045, "learning_rate": 1.510005418781273e-05, "loss": 0.5733864307403564, "step": 10505 }, { "epoch": 12.89079754601227, "grad_norm": 0.2598781883716583, "learning_rate": 1.5095445536400427e-05, "loss": 0.6471817493438721, "step": 10506 }, { "epoch": 12.892024539877301, "grad_norm": 0.24759049713611603, "learning_rate": 1.5090837284212472e-05, "loss": 0.8315441012382507, "step": 10507 }, { "epoch": 12.893251533742331, "grad_norm": 0.2719755470752716, "learning_rate": 1.5086229431434601e-05, "loss": 0.33126118779182434, "step": 10508 }, { "epoch": 12.894478527607362, "grad_norm": 0.26110172271728516, "learning_rate": 1.5081621978252548e-05, "loss": 0.5783833265304565, "step": 10509 }, { "epoch": 12.895705521472392, "grad_norm": 0.2881051301956177, "learning_rate": 1.5077014924852026e-05, "loss": 0.6389544606208801, "step": 10510 }, { "epoch": 12.896932515337424, "grad_norm": 0.27925342321395874, "learning_rate": 1.5072408271418723e-05, "loss": 0.6171417236328125, "step": 10511 }, { "epoch": 12.898159509202454, "grad_norm": 0.24947476387023926, "learning_rate": 1.5067802018138333e-05, "loss": 0.596111536026001, "step": 10512 }, { "epoch": 12.899386503067484, "grad_norm": 0.30759328603744507, "learning_rate": 1.5063196165196516e-05, "loss": 0.685662031173706, "step": 10513 }, { "epoch": 12.900613496932515, "grad_norm": 0.23570168018341064, "learning_rate": 1.5058590712778928e-05, "loss": 0.6013649702072144, "step": 10514 }, { "epoch": 12.901840490797547, "grad_norm": 0.2645385265350342, "learning_rate": 1.5053985661071188e-05, "loss": 0.7225919961929321, "step": 10515 }, { "epoch": 12.903067484662577, "grad_norm": 0.3188537657260895, "learning_rate": 1.5049381010258917e-05, "loss": 0.6261975765228271, "step": 10516 }, { "epoch": 12.904294478527607, "grad_norm": 0.2220637947320938, "learning_rate": 1.5044776760527727e-05, "loss": 0.5058014988899231, "step": 10517 }, { "epoch": 12.905521472392637, "grad_norm": 0.2858502268791199, "learning_rate": 1.5040172912063182e-05, "loss": 0.3496761918067932, "step": 10518 }, { "epoch": 12.90674846625767, "grad_norm": 0.2597216069698334, "learning_rate": 1.5035569465050869e-05, "loss": 0.5822521448135376, "step": 10519 }, { "epoch": 12.9079754601227, "grad_norm": 0.255443274974823, "learning_rate": 1.5030966419676329e-05, "loss": 0.6754099726676941, "step": 10520 }, { "epoch": 12.90920245398773, "grad_norm": 0.27404695749282837, "learning_rate": 1.5026363776125096e-05, "loss": 0.7025632858276367, "step": 10521 }, { "epoch": 12.91042944785276, "grad_norm": 0.21718081831932068, "learning_rate": 1.50217615345827e-05, "loss": 0.6247342228889465, "step": 10522 }, { "epoch": 12.911656441717792, "grad_norm": 0.26673921942710876, "learning_rate": 1.5017159695234632e-05, "loss": 0.7015208005905151, "step": 10523 }, { "epoch": 12.912883435582822, "grad_norm": 0.2824779152870178, "learning_rate": 1.5012558258266391e-05, "loss": 0.5047683715820312, "step": 10524 }, { "epoch": 12.914110429447852, "grad_norm": 0.25627899169921875, "learning_rate": 1.5007957223863434e-05, "loss": 0.7287921905517578, "step": 10525 }, { "epoch": 12.915337423312884, "grad_norm": 0.29855453968048096, "learning_rate": 1.5003356592211226e-05, "loss": 0.5703563094139099, "step": 10526 }, { "epoch": 12.916564417177915, "grad_norm": 0.24849307537078857, "learning_rate": 1.4998756363495203e-05, "loss": 0.7197933197021484, "step": 10527 }, { "epoch": 12.917791411042945, "grad_norm": 0.3490165174007416, "learning_rate": 1.4994156537900788e-05, "loss": 0.5488051176071167, "step": 10528 }, { "epoch": 12.919018404907975, "grad_norm": 0.2408599555492401, "learning_rate": 1.4989557115613387e-05, "loss": 0.6648540496826172, "step": 10529 }, { "epoch": 12.920245398773005, "grad_norm": 0.26215940713882446, "learning_rate": 1.4984958096818383e-05, "loss": 0.5768876671791077, "step": 10530 }, { "epoch": 12.921472392638037, "grad_norm": 0.23348596692085266, "learning_rate": 1.4980359481701153e-05, "loss": 0.8051613569259644, "step": 10531 }, { "epoch": 12.922699386503067, "grad_norm": 0.3019452691078186, "learning_rate": 1.4975761270447064e-05, "loss": 0.36945289373397827, "step": 10532 }, { "epoch": 12.923926380368098, "grad_norm": 0.2913174331188202, "learning_rate": 1.4971163463241438e-05, "loss": 0.5593756437301636, "step": 10533 }, { "epoch": 12.92515337423313, "grad_norm": 0.3278563916683197, "learning_rate": 1.496656606026961e-05, "loss": 0.26122772693634033, "step": 10534 }, { "epoch": 12.92638036809816, "grad_norm": 0.21792194247245789, "learning_rate": 1.4961969061716891e-05, "loss": 0.5427676439285278, "step": 10535 }, { "epoch": 12.92760736196319, "grad_norm": 0.305499792098999, "learning_rate": 1.4957372467768572e-05, "loss": 0.5816251635551453, "step": 10536 }, { "epoch": 12.92883435582822, "grad_norm": 0.27253520488739014, "learning_rate": 1.4952776278609925e-05, "loss": 0.6889444589614868, "step": 10537 }, { "epoch": 12.93006134969325, "grad_norm": 0.2932608723640442, "learning_rate": 1.4948180494426212e-05, "loss": 0.8436762094497681, "step": 10538 }, { "epoch": 12.931288343558283, "grad_norm": 0.31186622381210327, "learning_rate": 1.4943585115402675e-05, "loss": 0.6306883692741394, "step": 10539 }, { "epoch": 12.932515337423313, "grad_norm": 0.32628294825553894, "learning_rate": 1.4938990141724534e-05, "loss": 0.5124668478965759, "step": 10540 }, { "epoch": 12.933742331288343, "grad_norm": 0.31520625948905945, "learning_rate": 1.4934395573577015e-05, "loss": 0.46462777256965637, "step": 10541 }, { "epoch": 12.934969325153375, "grad_norm": 0.2738792300224304, "learning_rate": 1.4929801411145295e-05, "loss": 0.6118382215499878, "step": 10542 }, { "epoch": 12.936196319018405, "grad_norm": 0.24291352927684784, "learning_rate": 1.4925207654614565e-05, "loss": 0.7641913890838623, "step": 10543 }, { "epoch": 12.937423312883435, "grad_norm": 0.24801777303218842, "learning_rate": 1.4920614304169978e-05, "loss": 0.5689210295677185, "step": 10544 }, { "epoch": 12.938650306748466, "grad_norm": 0.20295515656471252, "learning_rate": 1.4916021359996677e-05, "loss": 0.5740965604782104, "step": 10545 }, { "epoch": 12.939877300613498, "grad_norm": 0.2992590367794037, "learning_rate": 1.49114288222798e-05, "loss": 0.7735904455184937, "step": 10546 }, { "epoch": 12.941104294478528, "grad_norm": 0.25624728202819824, "learning_rate": 1.4906836691204451e-05, "loss": 0.562638521194458, "step": 10547 }, { "epoch": 12.942331288343558, "grad_norm": 0.25428274273872375, "learning_rate": 1.4902244966955731e-05, "loss": 0.8557918667793274, "step": 10548 }, { "epoch": 12.943558282208588, "grad_norm": 0.25685468316078186, "learning_rate": 1.4897653649718712e-05, "loss": 0.7565156817436218, "step": 10549 }, { "epoch": 12.94478527607362, "grad_norm": 0.3513050079345703, "learning_rate": 1.4893062739678463e-05, "loss": 0.6869139075279236, "step": 10550 }, { "epoch": 12.94601226993865, "grad_norm": 0.3041188418865204, "learning_rate": 1.4888472237020034e-05, "loss": 0.6250312924385071, "step": 10551 }, { "epoch": 12.94723926380368, "grad_norm": 0.30140167474746704, "learning_rate": 1.4883882141928443e-05, "loss": 0.7317388653755188, "step": 10552 }, { "epoch": 12.948466257668711, "grad_norm": 0.2534619867801666, "learning_rate": 1.4879292454588717e-05, "loss": 0.6699814200401306, "step": 10553 }, { "epoch": 12.949693251533743, "grad_norm": 0.2742829918861389, "learning_rate": 1.4874703175185841e-05, "loss": 0.6024912595748901, "step": 10554 }, { "epoch": 12.950920245398773, "grad_norm": 0.3144311010837555, "learning_rate": 1.4870114303904803e-05, "loss": 0.7488552331924438, "step": 10555 }, { "epoch": 12.952147239263804, "grad_norm": 0.2621362507343292, "learning_rate": 1.4865525840930561e-05, "loss": 0.6232331395149231, "step": 10556 }, { "epoch": 12.953374233128834, "grad_norm": 0.2365599423646927, "learning_rate": 1.4860937786448079e-05, "loss": 0.6981648206710815, "step": 10557 }, { "epoch": 12.954601226993866, "grad_norm": 1.3747094869613647, "learning_rate": 1.4856350140642273e-05, "loss": 0.6373331546783447, "step": 10558 }, { "epoch": 12.955828220858896, "grad_norm": 0.2530227303504944, "learning_rate": 1.4851762903698057e-05, "loss": 0.5756849050521851, "step": 10559 }, { "epoch": 12.957055214723926, "grad_norm": 0.3894573748111725, "learning_rate": 1.4847176075800345e-05, "loss": 0.6638885736465454, "step": 10560 }, { "epoch": 12.958282208588956, "grad_norm": 0.2766924798488617, "learning_rate": 1.4842589657134e-05, "loss": 0.560258150100708, "step": 10561 }, { "epoch": 12.959509202453988, "grad_norm": 0.296516478061676, "learning_rate": 1.4838003647883905e-05, "loss": 0.5162907838821411, "step": 10562 }, { "epoch": 12.960736196319019, "grad_norm": 0.25698065757751465, "learning_rate": 1.4833418048234893e-05, "loss": 0.7378814220428467, "step": 10563 }, { "epoch": 12.961963190184049, "grad_norm": 0.32302767038345337, "learning_rate": 1.4828832858371801e-05, "loss": 0.7278720736503601, "step": 10564 }, { "epoch": 12.963190184049079, "grad_norm": 0.2615664005279541, "learning_rate": 1.4824248078479458e-05, "loss": 0.7477800846099854, "step": 10565 }, { "epoch": 12.964417177914111, "grad_norm": 0.3315808176994324, "learning_rate": 1.4819663708742642e-05, "loss": 0.4082464277744293, "step": 10566 }, { "epoch": 12.965644171779141, "grad_norm": 0.2552633285522461, "learning_rate": 1.4815079749346151e-05, "loss": 0.638657808303833, "step": 10567 }, { "epoch": 12.966871165644172, "grad_norm": 0.2816590368747711, "learning_rate": 1.4810496200474749e-05, "loss": 0.6835187673568726, "step": 10568 }, { "epoch": 12.968098159509202, "grad_norm": 0.35653433203697205, "learning_rate": 1.4805913062313182e-05, "loss": 0.4445486068725586, "step": 10569 }, { "epoch": 12.969325153374234, "grad_norm": 0.3514978289604187, "learning_rate": 1.4801330335046187e-05, "loss": 0.5451598763465881, "step": 10570 }, { "epoch": 12.970552147239264, "grad_norm": 0.2566346228122711, "learning_rate": 1.4796748018858475e-05, "loss": 0.6214083433151245, "step": 10571 }, { "epoch": 12.971779141104294, "grad_norm": 0.2796567976474762, "learning_rate": 1.479216611393475e-05, "loss": 0.6726173162460327, "step": 10572 }, { "epoch": 12.973006134969324, "grad_norm": 0.23174817860126495, "learning_rate": 1.47875846204597e-05, "loss": 0.7148479223251343, "step": 10573 }, { "epoch": 12.974233128834356, "grad_norm": 0.32025933265686035, "learning_rate": 1.4783003538617982e-05, "loss": 0.5969526171684265, "step": 10574 }, { "epoch": 12.975460122699387, "grad_norm": 0.31956565380096436, "learning_rate": 1.4778422868594255e-05, "loss": 0.6659612059593201, "step": 10575 }, { "epoch": 12.976687116564417, "grad_norm": 0.3609808087348938, "learning_rate": 1.4773842610573147e-05, "loss": 0.4526892602443695, "step": 10576 }, { "epoch": 12.977914110429447, "grad_norm": 0.29470735788345337, "learning_rate": 1.4769262764739277e-05, "loss": 0.6728960871696472, "step": 10577 }, { "epoch": 12.979141104294479, "grad_norm": 0.21720488369464874, "learning_rate": 1.476468333127724e-05, "loss": 0.40114516019821167, "step": 10578 }, { "epoch": 12.98036809815951, "grad_norm": 0.2661912441253662, "learning_rate": 1.4760104310371636e-05, "loss": 0.8597099184989929, "step": 10579 }, { "epoch": 12.98159509202454, "grad_norm": 0.2584551274776459, "learning_rate": 1.4755525702207018e-05, "loss": 0.40423858165740967, "step": 10580 }, { "epoch": 12.98282208588957, "grad_norm": 0.27901068329811096, "learning_rate": 1.4750947506967938e-05, "loss": 0.8500063419342041, "step": 10581 }, { "epoch": 12.984049079754602, "grad_norm": 0.26074230670928955, "learning_rate": 1.4746369724838941e-05, "loss": 0.6670469045639038, "step": 10582 }, { "epoch": 12.985276073619632, "grad_norm": 0.2518526315689087, "learning_rate": 1.474179235600453e-05, "loss": 0.6543309688568115, "step": 10583 }, { "epoch": 12.986503067484662, "grad_norm": 0.2566350996494293, "learning_rate": 1.4737215400649218e-05, "loss": 0.7289572954177856, "step": 10584 }, { "epoch": 12.987730061349692, "grad_norm": 0.25245630741119385, "learning_rate": 1.4732638858957476e-05, "loss": 0.7685628533363342, "step": 10585 }, { "epoch": 12.988957055214724, "grad_norm": 0.23528197407722473, "learning_rate": 1.472806273111378e-05, "loss": 0.7940806150436401, "step": 10586 }, { "epoch": 12.990184049079755, "grad_norm": 0.31222036480903625, "learning_rate": 1.4723487017302581e-05, "loss": 0.48324471712112427, "step": 10587 }, { "epoch": 12.991411042944785, "grad_norm": 0.23354826867580414, "learning_rate": 1.4718911717708307e-05, "loss": 0.5741459131240845, "step": 10588 }, { "epoch": 12.992638036809815, "grad_norm": 0.24575698375701904, "learning_rate": 1.4714336832515384e-05, "loss": 0.7334717512130737, "step": 10589 }, { "epoch": 12.993865030674847, "grad_norm": 0.29938727617263794, "learning_rate": 1.4709762361908202e-05, "loss": 0.6784722805023193, "step": 10590 }, { "epoch": 12.995092024539877, "grad_norm": 0.2839692533016205, "learning_rate": 1.4705188306071154e-05, "loss": 0.6156958341598511, "step": 10591 }, { "epoch": 12.996319018404908, "grad_norm": 0.2309553027153015, "learning_rate": 1.4700614665188605e-05, "loss": 0.4468681216239929, "step": 10592 }, { "epoch": 12.99754601226994, "grad_norm": 0.2519240379333496, "learning_rate": 1.4696041439444901e-05, "loss": 0.5115182399749756, "step": 10593 }, { "epoch": 12.99877300613497, "grad_norm": 0.22829023003578186, "learning_rate": 1.4691468629024386e-05, "loss": 0.6700824499130249, "step": 10594 }, { "epoch": 13.0, "grad_norm": 0.27528566122055054, "learning_rate": 1.4686896234111357e-05, "loss": 0.4865180253982544, "step": 10595 }, { "epoch": 13.00122699386503, "grad_norm": 0.25425586104393005, "learning_rate": 1.4682324254890134e-05, "loss": 0.7222822308540344, "step": 10596 }, { "epoch": 13.002453987730062, "grad_norm": 0.2769336998462677, "learning_rate": 1.4677752691544999e-05, "loss": 0.6434780359268188, "step": 10597 }, { "epoch": 13.003680981595092, "grad_norm": 0.2149849683046341, "learning_rate": 1.4673181544260204e-05, "loss": 0.7207925319671631, "step": 10598 }, { "epoch": 13.004907975460123, "grad_norm": 0.21296656131744385, "learning_rate": 1.4668610813220013e-05, "loss": 0.5685372948646545, "step": 10599 }, { "epoch": 13.006134969325153, "grad_norm": 0.24017426371574402, "learning_rate": 1.466404049860864e-05, "loss": 0.6553913354873657, "step": 10600 }, { "epoch": 13.007361963190185, "grad_norm": 0.2307741641998291, "learning_rate": 1.4659470600610331e-05, "loss": 0.5554091930389404, "step": 10601 }, { "epoch": 13.008588957055215, "grad_norm": 0.27560681104660034, "learning_rate": 1.4654901119409264e-05, "loss": 0.695292055606842, "step": 10602 }, { "epoch": 13.009815950920245, "grad_norm": 0.2671225070953369, "learning_rate": 1.4650332055189636e-05, "loss": 0.6648776531219482, "step": 10603 }, { "epoch": 13.011042944785276, "grad_norm": 0.31796348094940186, "learning_rate": 1.4645763408135595e-05, "loss": 0.6017695665359497, "step": 10604 }, { "epoch": 13.012269938650308, "grad_norm": 0.2661946415901184, "learning_rate": 1.4641195178431303e-05, "loss": 0.7306814789772034, "step": 10605 }, { "epoch": 13.013496932515338, "grad_norm": 0.2436073124408722, "learning_rate": 1.463662736626089e-05, "loss": 0.5157918930053711, "step": 10606 }, { "epoch": 13.014723926380368, "grad_norm": 0.24592600762844086, "learning_rate": 1.463205997180847e-05, "loss": 0.6760247349739075, "step": 10607 }, { "epoch": 13.015950920245398, "grad_norm": 0.22153069078922272, "learning_rate": 1.4627492995258146e-05, "loss": 0.50501549243927, "step": 10608 }, { "epoch": 13.01717791411043, "grad_norm": 0.23436930775642395, "learning_rate": 1.4622926436793993e-05, "loss": 0.9162572622299194, "step": 10609 }, { "epoch": 13.01840490797546, "grad_norm": 0.2831934690475464, "learning_rate": 1.4618360296600075e-05, "loss": 0.6598699688911438, "step": 10610 }, { "epoch": 13.01963190184049, "grad_norm": 0.2433818131685257, "learning_rate": 1.461379457486045e-05, "loss": 0.6280831694602966, "step": 10611 }, { "epoch": 13.020858895705521, "grad_norm": 0.27264469861984253, "learning_rate": 1.4609229271759139e-05, "loss": 0.5393368601799011, "step": 10612 }, { "epoch": 13.022085889570553, "grad_norm": 0.2860180735588074, "learning_rate": 1.4604664387480162e-05, "loss": 0.525719940662384, "step": 10613 }, { "epoch": 13.023312883435583, "grad_norm": 0.2718070149421692, "learning_rate": 1.4600099922207521e-05, "loss": 0.5326890349388123, "step": 10614 }, { "epoch": 13.024539877300613, "grad_norm": 0.20924609899520874, "learning_rate": 1.4595535876125188e-05, "loss": 0.4385666251182556, "step": 10615 }, { "epoch": 13.025766871165644, "grad_norm": 0.24154162406921387, "learning_rate": 1.4590972249417123e-05, "loss": 0.6505125164985657, "step": 10616 }, { "epoch": 13.026993865030676, "grad_norm": 0.2651282548904419, "learning_rate": 1.4586409042267286e-05, "loss": 0.4118477702140808, "step": 10617 }, { "epoch": 13.028220858895706, "grad_norm": 0.2843568027019501, "learning_rate": 1.4581846254859594e-05, "loss": 0.6599227786064148, "step": 10618 }, { "epoch": 13.029447852760736, "grad_norm": 0.313306599855423, "learning_rate": 1.4577283887377965e-05, "loss": 0.59212726354599, "step": 10619 }, { "epoch": 13.030674846625766, "grad_norm": 0.2701837420463562, "learning_rate": 1.4572721940006308e-05, "loss": 0.6197932958602905, "step": 10620 }, { "epoch": 13.031901840490798, "grad_norm": 0.2546810209751129, "learning_rate": 1.4568160412928483e-05, "loss": 0.7446179389953613, "step": 10621 }, { "epoch": 13.033128834355828, "grad_norm": 0.2710573375225067, "learning_rate": 1.4563599306328347e-05, "loss": 0.5789084434509277, "step": 10622 }, { "epoch": 13.034355828220859, "grad_norm": 0.27306851744651794, "learning_rate": 1.4559038620389765e-05, "loss": 0.2116387039422989, "step": 10623 }, { "epoch": 13.035582822085889, "grad_norm": 0.24273334443569183, "learning_rate": 1.455447835529657e-05, "loss": 0.46911028027534485, "step": 10624 }, { "epoch": 13.036809815950921, "grad_norm": 0.2398601472377777, "learning_rate": 1.454991851123255e-05, "loss": 0.44024306535720825, "step": 10625 }, { "epoch": 13.038036809815951, "grad_norm": 0.30972200632095337, "learning_rate": 1.454535908838151e-05, "loss": 0.7155049443244934, "step": 10626 }, { "epoch": 13.039263803680981, "grad_norm": 0.2491135150194168, "learning_rate": 1.4540800086927229e-05, "loss": 0.7525806427001953, "step": 10627 }, { "epoch": 13.040490797546012, "grad_norm": 0.287769079208374, "learning_rate": 1.4536241507053467e-05, "loss": 0.6703393459320068, "step": 10628 }, { "epoch": 13.041717791411044, "grad_norm": 0.24758656322956085, "learning_rate": 1.4531683348943973e-05, "loss": 0.5662767887115479, "step": 10629 }, { "epoch": 13.042944785276074, "grad_norm": 0.26806366443634033, "learning_rate": 1.4527125612782457e-05, "loss": 0.7992678880691528, "step": 10630 }, { "epoch": 13.044171779141104, "grad_norm": 0.281910240650177, "learning_rate": 1.452256829875264e-05, "loss": 0.8380658626556396, "step": 10631 }, { "epoch": 13.045398773006134, "grad_norm": 0.2727545201778412, "learning_rate": 1.4518011407038212e-05, "loss": 0.5557678937911987, "step": 10632 }, { "epoch": 13.046625766871166, "grad_norm": 0.2539604604244232, "learning_rate": 1.4513454937822846e-05, "loss": 0.7226741313934326, "step": 10633 }, { "epoch": 13.047852760736196, "grad_norm": 0.26231783628463745, "learning_rate": 1.4508898891290218e-05, "loss": 0.6018736362457275, "step": 10634 }, { "epoch": 13.049079754601227, "grad_norm": 0.23124390840530396, "learning_rate": 1.4504343267623938e-05, "loss": 0.47928521037101746, "step": 10635 }, { "epoch": 13.050306748466257, "grad_norm": 0.2844214141368866, "learning_rate": 1.4499788067007652e-05, "loss": 0.6079316139221191, "step": 10636 }, { "epoch": 13.051533742331289, "grad_norm": 0.27255764603614807, "learning_rate": 1.449523328962496e-05, "loss": 0.6620776653289795, "step": 10637 }, { "epoch": 13.05276073619632, "grad_norm": 0.2652871608734131, "learning_rate": 1.4490678935659451e-05, "loss": 0.4327009916305542, "step": 10638 }, { "epoch": 13.05398773006135, "grad_norm": 0.28121650218963623, "learning_rate": 1.448612500529471e-05, "loss": 0.6556864976882935, "step": 10639 }, { "epoch": 13.05521472392638, "grad_norm": 0.254803866147995, "learning_rate": 1.4481571498714278e-05, "loss": 0.7081599235534668, "step": 10640 }, { "epoch": 13.056441717791412, "grad_norm": 0.20802877843379974, "learning_rate": 1.4477018416101696e-05, "loss": 0.4224048852920532, "step": 10641 }, { "epoch": 13.057668711656442, "grad_norm": 0.2115013748407364, "learning_rate": 1.4472465757640492e-05, "loss": 0.5545357465744019, "step": 10642 }, { "epoch": 13.058895705521472, "grad_norm": 0.26979583501815796, "learning_rate": 1.4467913523514164e-05, "loss": 0.6711087226867676, "step": 10643 }, { "epoch": 13.060122699386502, "grad_norm": 0.27660393714904785, "learning_rate": 1.4463361713906215e-05, "loss": 0.6082174181938171, "step": 10644 }, { "epoch": 13.061349693251534, "grad_norm": 0.22234496474266052, "learning_rate": 1.4458810329000084e-05, "loss": 0.6722216606140137, "step": 10645 }, { "epoch": 13.062576687116565, "grad_norm": 0.2856068015098572, "learning_rate": 1.4454259368979255e-05, "loss": 0.6451151967048645, "step": 10646 }, { "epoch": 13.063803680981595, "grad_norm": 0.2894643545150757, "learning_rate": 1.4449708834027153e-05, "loss": 0.8290522694587708, "step": 10647 }, { "epoch": 13.065030674846625, "grad_norm": 0.28655973076820374, "learning_rate": 1.4445158724327206e-05, "loss": 0.6677722930908203, "step": 10648 }, { "epoch": 13.066257668711657, "grad_norm": 0.25659751892089844, "learning_rate": 1.4440609040062803e-05, "loss": 0.6267755627632141, "step": 10649 }, { "epoch": 13.067484662576687, "grad_norm": 0.25616884231567383, "learning_rate": 1.4436059781417327e-05, "loss": 0.6621798872947693, "step": 10650 }, { "epoch": 13.068711656441717, "grad_norm": 0.22659537196159363, "learning_rate": 1.4431510948574154e-05, "loss": 0.5570310354232788, "step": 10651 }, { "epoch": 13.069938650306748, "grad_norm": 0.2755650579929352, "learning_rate": 1.4426962541716632e-05, "loss": 0.6682619452476501, "step": 10652 }, { "epoch": 13.07116564417178, "grad_norm": 0.2755105793476105, "learning_rate": 1.4422414561028105e-05, "loss": 0.7296369075775146, "step": 10653 }, { "epoch": 13.07239263803681, "grad_norm": 0.26487797498703003, "learning_rate": 1.4417867006691871e-05, "loss": 0.5974353551864624, "step": 10654 }, { "epoch": 13.07361963190184, "grad_norm": 0.22717827558517456, "learning_rate": 1.4413319878891235e-05, "loss": 0.5937856435775757, "step": 10655 }, { "epoch": 13.07484662576687, "grad_norm": 0.2626100778579712, "learning_rate": 1.440877317780948e-05, "loss": 0.4904773235321045, "step": 10656 }, { "epoch": 13.076073619631902, "grad_norm": 0.30661067366600037, "learning_rate": 1.4404226903629869e-05, "loss": 0.6874955892562866, "step": 10657 }, { "epoch": 13.077300613496933, "grad_norm": 0.3918216824531555, "learning_rate": 1.4399681056535663e-05, "loss": 0.6843768954277039, "step": 10658 }, { "epoch": 13.078527607361963, "grad_norm": 0.2659836709499359, "learning_rate": 1.4395135636710072e-05, "loss": 0.6320899724960327, "step": 10659 }, { "epoch": 13.079754601226995, "grad_norm": 0.30075711011886597, "learning_rate": 1.4390590644336318e-05, "loss": 0.6519305109977722, "step": 10660 }, { "epoch": 13.080981595092025, "grad_norm": 0.2827310264110565, "learning_rate": 1.4386046079597595e-05, "loss": 0.5914794206619263, "step": 10661 }, { "epoch": 13.082208588957055, "grad_norm": 0.2406134456396103, "learning_rate": 1.4381501942677084e-05, "loss": 0.742314338684082, "step": 10662 }, { "epoch": 13.083435582822085, "grad_norm": 0.2979927361011505, "learning_rate": 1.4376958233757954e-05, "loss": 0.42999064922332764, "step": 10663 }, { "epoch": 13.084662576687117, "grad_norm": 0.2435983568429947, "learning_rate": 1.4372414953023328e-05, "loss": 0.5138782262802124, "step": 10664 }, { "epoch": 13.085889570552148, "grad_norm": 0.23843789100646973, "learning_rate": 1.4367872100656344e-05, "loss": 0.7006345391273499, "step": 10665 }, { "epoch": 13.087116564417178, "grad_norm": 0.29993563890457153, "learning_rate": 1.4363329676840114e-05, "loss": 0.6285065412521362, "step": 10666 }, { "epoch": 13.088343558282208, "grad_norm": 0.2565472424030304, "learning_rate": 1.4358787681757729e-05, "loss": 0.6598167419433594, "step": 10667 }, { "epoch": 13.08957055214724, "grad_norm": 0.3225134313106537, "learning_rate": 1.435424611559226e-05, "loss": 0.6003702878952026, "step": 10668 }, { "epoch": 13.09079754601227, "grad_norm": 0.3142983317375183, "learning_rate": 1.4349704978526767e-05, "loss": 0.5284070372581482, "step": 10669 }, { "epoch": 13.0920245398773, "grad_norm": 0.28437909483909607, "learning_rate": 1.4345164270744294e-05, "loss": 0.5965099334716797, "step": 10670 }, { "epoch": 13.09325153374233, "grad_norm": 0.2577437460422516, "learning_rate": 1.434062399242786e-05, "loss": 0.7153952717781067, "step": 10671 }, { "epoch": 13.094478527607363, "grad_norm": 0.28497329354286194, "learning_rate": 1.4336084143760481e-05, "loss": 0.5272413492202759, "step": 10672 }, { "epoch": 13.095705521472393, "grad_norm": 0.2638389766216278, "learning_rate": 1.4331544724925122e-05, "loss": 0.5313028693199158, "step": 10673 }, { "epoch": 13.096932515337423, "grad_norm": 0.26251888275146484, "learning_rate": 1.4327005736104773e-05, "loss": 0.6223304271697998, "step": 10674 }, { "epoch": 13.098159509202453, "grad_norm": 0.2582906484603882, "learning_rate": 1.432246717748238e-05, "loss": 0.5525506734848022, "step": 10675 }, { "epoch": 13.099386503067485, "grad_norm": 0.251327782869339, "learning_rate": 1.431792904924088e-05, "loss": 0.8080987930297852, "step": 10676 }, { "epoch": 13.100613496932516, "grad_norm": 0.2156551331281662, "learning_rate": 1.431339135156321e-05, "loss": 0.6400494575500488, "step": 10677 }, { "epoch": 13.101840490797546, "grad_norm": 0.2985442578792572, "learning_rate": 1.430885408463224e-05, "loss": 0.6740592122077942, "step": 10678 }, { "epoch": 13.103067484662576, "grad_norm": 0.29636529088020325, "learning_rate": 1.4304317248630871e-05, "loss": 0.4457250237464905, "step": 10679 }, { "epoch": 13.104294478527608, "grad_norm": 0.22825996577739716, "learning_rate": 1.4299780843741967e-05, "loss": 0.35471075773239136, "step": 10680 }, { "epoch": 13.105521472392638, "grad_norm": 0.26648008823394775, "learning_rate": 1.4295244870148384e-05, "loss": 0.6038479804992676, "step": 10681 }, { "epoch": 13.106748466257669, "grad_norm": 0.4024699330329895, "learning_rate": 1.4290709328032955e-05, "loss": 0.5234675407409668, "step": 10682 }, { "epoch": 13.107975460122699, "grad_norm": 0.26186689734458923, "learning_rate": 1.4286174217578479e-05, "loss": 0.6966535449028015, "step": 10683 }, { "epoch": 13.10920245398773, "grad_norm": 0.3174460232257843, "learning_rate": 1.4281639538967766e-05, "loss": 0.550309419631958, "step": 10684 }, { "epoch": 13.110429447852761, "grad_norm": 0.2887771725654602, "learning_rate": 1.4277105292383594e-05, "loss": 0.7613444924354553, "step": 10685 }, { "epoch": 13.111656441717791, "grad_norm": 0.252990186214447, "learning_rate": 1.4272571478008722e-05, "loss": 0.6385270953178406, "step": 10686 }, { "epoch": 13.112883435582821, "grad_norm": 0.2630611062049866, "learning_rate": 1.426803809602591e-05, "loss": 0.7512617707252502, "step": 10687 }, { "epoch": 13.114110429447853, "grad_norm": 0.2200251966714859, "learning_rate": 1.4263505146617865e-05, "loss": 0.5335235595703125, "step": 10688 }, { "epoch": 13.115337423312884, "grad_norm": 0.2604767978191376, "learning_rate": 1.4258972629967294e-05, "loss": 0.6401417851448059, "step": 10689 }, { "epoch": 13.116564417177914, "grad_norm": 0.29491913318634033, "learning_rate": 1.4254440546256914e-05, "loss": 0.5692827105522156, "step": 10690 }, { "epoch": 13.117791411042944, "grad_norm": 0.26513832807540894, "learning_rate": 1.4249908895669395e-05, "loss": 0.5506641864776611, "step": 10691 }, { "epoch": 13.119018404907976, "grad_norm": 0.2570270597934723, "learning_rate": 1.4245377678387378e-05, "loss": 0.6058310270309448, "step": 10692 }, { "epoch": 13.120245398773006, "grad_norm": 0.24755889177322388, "learning_rate": 1.4240846894593519e-05, "loss": 0.5907089114189148, "step": 10693 }, { "epoch": 13.121472392638037, "grad_norm": 0.2614697217941284, "learning_rate": 1.4236316544470429e-05, "loss": 0.6875295042991638, "step": 10694 }, { "epoch": 13.122699386503067, "grad_norm": 0.278471440076828, "learning_rate": 1.4231786628200727e-05, "loss": 0.732495129108429, "step": 10695 }, { "epoch": 13.123926380368099, "grad_norm": 0.298864483833313, "learning_rate": 1.4227257145966988e-05, "loss": 0.6710281372070312, "step": 10696 }, { "epoch": 13.125153374233129, "grad_norm": 0.26570501923561096, "learning_rate": 1.4222728097951803e-05, "loss": 0.62288898229599, "step": 10697 }, { "epoch": 13.12638036809816, "grad_norm": 0.25938957929611206, "learning_rate": 1.4218199484337696e-05, "loss": 0.37789401412010193, "step": 10698 }, { "epoch": 13.12760736196319, "grad_norm": 0.28553006052970886, "learning_rate": 1.4213671305307222e-05, "loss": 0.5718552470207214, "step": 10699 }, { "epoch": 13.128834355828221, "grad_norm": 0.2861769497394562, "learning_rate": 1.4209143561042893e-05, "loss": 0.6864010095596313, "step": 10700 }, { "epoch": 13.130061349693252, "grad_norm": 0.2572595775127411, "learning_rate": 1.420461625172721e-05, "loss": 0.48754221200942993, "step": 10701 }, { "epoch": 13.131288343558282, "grad_norm": 0.3144965171813965, "learning_rate": 1.4200089377542664e-05, "loss": 0.8323603868484497, "step": 10702 }, { "epoch": 13.132515337423312, "grad_norm": 0.3179311454296112, "learning_rate": 1.4195562938671705e-05, "loss": 0.7056158781051636, "step": 10703 }, { "epoch": 13.133742331288344, "grad_norm": 0.26311686635017395, "learning_rate": 1.419103693529679e-05, "loss": 0.6663171052932739, "step": 10704 }, { "epoch": 13.134969325153374, "grad_norm": 0.2815898656845093, "learning_rate": 1.4186511367600346e-05, "loss": 0.5549563765525818, "step": 10705 }, { "epoch": 13.136196319018405, "grad_norm": 0.24868787825107574, "learning_rate": 1.4181986235764788e-05, "loss": 0.5231446027755737, "step": 10706 }, { "epoch": 13.137423312883435, "grad_norm": 0.28203609585762024, "learning_rate": 1.4177461539972519e-05, "loss": 0.6570352911949158, "step": 10707 }, { "epoch": 13.138650306748467, "grad_norm": 0.28678378462791443, "learning_rate": 1.41729372804059e-05, "loss": 0.7342174053192139, "step": 10708 }, { "epoch": 13.139877300613497, "grad_norm": 0.30424660444259644, "learning_rate": 1.4168413457247305e-05, "loss": 0.4418182373046875, "step": 10709 }, { "epoch": 13.141104294478527, "grad_norm": 0.2805635929107666, "learning_rate": 1.4163890070679064e-05, "loss": 0.6599897146224976, "step": 10710 }, { "epoch": 13.142331288343557, "grad_norm": 0.2888251543045044, "learning_rate": 1.415936712088351e-05, "loss": 0.6000779867172241, "step": 10711 }, { "epoch": 13.14355828220859, "grad_norm": 0.2511034607887268, "learning_rate": 1.415484460804295e-05, "loss": 0.6201218366622925, "step": 10712 }, { "epoch": 13.14478527607362, "grad_norm": 0.30562248826026917, "learning_rate": 1.4150322532339671e-05, "loss": 0.6307284832000732, "step": 10713 }, { "epoch": 13.14601226993865, "grad_norm": 0.2705465257167816, "learning_rate": 1.4145800893955951e-05, "loss": 0.7167829275131226, "step": 10714 }, { "epoch": 13.14723926380368, "grad_norm": 0.24372117221355438, "learning_rate": 1.4141279693074035e-05, "loss": 0.506740391254425, "step": 10715 }, { "epoch": 13.148466257668712, "grad_norm": 0.24838906526565552, "learning_rate": 1.4136758929876176e-05, "loss": 0.6891814470291138, "step": 10716 }, { "epoch": 13.149693251533742, "grad_norm": 0.2499724179506302, "learning_rate": 1.4132238604544573e-05, "loss": 0.609915018081665, "step": 10717 }, { "epoch": 13.150920245398773, "grad_norm": 0.23767299950122833, "learning_rate": 1.4127718717261434e-05, "loss": 0.5810264348983765, "step": 10718 }, { "epoch": 13.152147239263805, "grad_norm": 0.2908426821231842, "learning_rate": 1.4123199268208947e-05, "loss": 0.6001471281051636, "step": 10719 }, { "epoch": 13.153374233128835, "grad_norm": 0.22210034728050232, "learning_rate": 1.4118680257569273e-05, "loss": 0.4353834390640259, "step": 10720 }, { "epoch": 13.154601226993865, "grad_norm": 0.2513978183269501, "learning_rate": 1.4114161685524574e-05, "loss": 0.6993585824966431, "step": 10721 }, { "epoch": 13.155828220858895, "grad_norm": 0.2557969391345978, "learning_rate": 1.4109643552256963e-05, "loss": 0.6630812883377075, "step": 10722 }, { "epoch": 13.157055214723927, "grad_norm": 0.2801971137523651, "learning_rate": 1.4105125857948556e-05, "loss": 0.6617208123207092, "step": 10723 }, { "epoch": 13.158282208588957, "grad_norm": 0.3134906589984894, "learning_rate": 1.4100608602781456e-05, "loss": 0.6646884679794312, "step": 10724 }, { "epoch": 13.159509202453988, "grad_norm": 0.25577038526535034, "learning_rate": 1.4096091786937734e-05, "loss": 0.505920946598053, "step": 10725 }, { "epoch": 13.160736196319018, "grad_norm": 0.2535880506038666, "learning_rate": 1.4091575410599468e-05, "loss": 0.5772415399551392, "step": 10726 }, { "epoch": 13.16196319018405, "grad_norm": 0.24002225697040558, "learning_rate": 1.4087059473948668e-05, "loss": 0.5268837809562683, "step": 10727 }, { "epoch": 13.16319018404908, "grad_norm": 0.3016403019428253, "learning_rate": 1.4082543977167381e-05, "loss": 0.5402105450630188, "step": 10728 }, { "epoch": 13.16441717791411, "grad_norm": 0.24895808100700378, "learning_rate": 1.4078028920437608e-05, "loss": 0.7674675583839417, "step": 10729 }, { "epoch": 13.16564417177914, "grad_norm": 0.2731612026691437, "learning_rate": 1.4073514303941337e-05, "loss": 0.4728630781173706, "step": 10730 }, { "epoch": 13.166871165644173, "grad_norm": 0.2825586795806885, "learning_rate": 1.4069000127860554e-05, "loss": 0.6291763186454773, "step": 10731 }, { "epoch": 13.168098159509203, "grad_norm": 0.24334393441677094, "learning_rate": 1.406448639237719e-05, "loss": 0.5636469721794128, "step": 10732 }, { "epoch": 13.169325153374233, "grad_norm": 0.32378441095352173, "learning_rate": 1.4059973097673187e-05, "loss": 0.5134554505348206, "step": 10733 }, { "epoch": 13.170552147239263, "grad_norm": 0.3031603991985321, "learning_rate": 1.4055460243930457e-05, "loss": 0.7320792078971863, "step": 10734 }, { "epoch": 13.171779141104295, "grad_norm": 0.2726536691188812, "learning_rate": 1.4050947831330935e-05, "loss": 0.6982824206352234, "step": 10735 }, { "epoch": 13.173006134969325, "grad_norm": 0.3046509325504303, "learning_rate": 1.4046435860056462e-05, "loss": 0.48068320751190186, "step": 10736 }, { "epoch": 13.174233128834356, "grad_norm": 0.24987655878067017, "learning_rate": 1.4041924330288921e-05, "loss": 0.5872737169265747, "step": 10737 }, { "epoch": 13.175460122699386, "grad_norm": 0.2851486802101135, "learning_rate": 1.4037413242210157e-05, "loss": 0.6372517347335815, "step": 10738 }, { "epoch": 13.176687116564418, "grad_norm": 0.2857123017311096, "learning_rate": 1.4032902596002e-05, "loss": 0.5375373363494873, "step": 10739 }, { "epoch": 13.177914110429448, "grad_norm": 0.2621070146560669, "learning_rate": 1.4028392391846268e-05, "loss": 0.5365416407585144, "step": 10740 }, { "epoch": 13.179141104294478, "grad_norm": 0.2581162750720978, "learning_rate": 1.4023882629924739e-05, "loss": 0.7161093950271606, "step": 10741 }, { "epoch": 13.180368098159509, "grad_norm": 0.27517491579055786, "learning_rate": 1.4019373310419196e-05, "loss": 0.6020679473876953, "step": 10742 }, { "epoch": 13.18159509202454, "grad_norm": 0.2251732498407364, "learning_rate": 1.4014864433511396e-05, "loss": 0.6828919649124146, "step": 10743 }, { "epoch": 13.18282208588957, "grad_norm": 0.2597944438457489, "learning_rate": 1.4010355999383079e-05, "loss": 0.8244296312332153, "step": 10744 }, { "epoch": 13.184049079754601, "grad_norm": 0.3383994698524475, "learning_rate": 1.4005848008215983e-05, "loss": 0.28941985964775085, "step": 10745 }, { "epoch": 13.185276073619631, "grad_norm": 0.28890207409858704, "learning_rate": 1.4001340460191781e-05, "loss": 0.5507364273071289, "step": 10746 }, { "epoch": 13.186503067484663, "grad_norm": 0.3332202434539795, "learning_rate": 1.3996833355492179e-05, "loss": 0.37686997652053833, "step": 10747 }, { "epoch": 13.187730061349694, "grad_norm": 0.24644753336906433, "learning_rate": 1.3992326694298844e-05, "loss": 0.7179627418518066, "step": 10748 }, { "epoch": 13.188957055214724, "grad_norm": 0.3082149624824524, "learning_rate": 1.3987820476793423e-05, "loss": 0.5020637512207031, "step": 10749 }, { "epoch": 13.190184049079754, "grad_norm": 0.3600420355796814, "learning_rate": 1.3983314703157563e-05, "loss": 0.6359543800354004, "step": 10750 }, { "epoch": 13.191411042944786, "grad_norm": 0.3253253102302551, "learning_rate": 1.3978809373572855e-05, "loss": 0.5605628490447998, "step": 10751 }, { "epoch": 13.192638036809816, "grad_norm": 0.25609004497528076, "learning_rate": 1.3974304488220907e-05, "loss": 0.6673272252082825, "step": 10752 }, { "epoch": 13.193865030674846, "grad_norm": 0.2578868269920349, "learning_rate": 1.39698000472833e-05, "loss": 0.521515429019928, "step": 10753 }, { "epoch": 13.195092024539877, "grad_norm": 0.2818772792816162, "learning_rate": 1.3965296050941592e-05, "loss": 0.7213131785392761, "step": 10754 }, { "epoch": 13.196319018404909, "grad_norm": 0.28933417797088623, "learning_rate": 1.3960792499377339e-05, "loss": 0.6024169921875, "step": 10755 }, { "epoch": 13.197546012269939, "grad_norm": 0.2771787643432617, "learning_rate": 1.3956289392772038e-05, "loss": 0.5456565022468567, "step": 10756 }, { "epoch": 13.198773006134969, "grad_norm": 0.2938121259212494, "learning_rate": 1.3951786731307222e-05, "loss": 0.4674414098262787, "step": 10757 }, { "epoch": 13.2, "grad_norm": 0.29655614495277405, "learning_rate": 1.3947284515164372e-05, "loss": 0.5558976531028748, "step": 10758 }, { "epoch": 13.201226993865031, "grad_norm": 0.27961158752441406, "learning_rate": 1.3942782744524973e-05, "loss": 0.6379793882369995, "step": 10759 }, { "epoch": 13.202453987730062, "grad_norm": 0.24919883906841278, "learning_rate": 1.3938281419570453e-05, "loss": 0.7872443199157715, "step": 10760 }, { "epoch": 13.203680981595092, "grad_norm": 0.2794681489467621, "learning_rate": 1.3933780540482261e-05, "loss": 0.49666187167167664, "step": 10761 }, { "epoch": 13.204907975460122, "grad_norm": 0.23396393656730652, "learning_rate": 1.392928010744181e-05, "loss": 0.49848517775535583, "step": 10762 }, { "epoch": 13.206134969325154, "grad_norm": 0.28280243277549744, "learning_rate": 1.3924780120630507e-05, "loss": 0.40018776059150696, "step": 10763 }, { "epoch": 13.207361963190184, "grad_norm": 0.24904781579971313, "learning_rate": 1.3920280580229739e-05, "loss": 0.4946937561035156, "step": 10764 }, { "epoch": 13.208588957055214, "grad_norm": 0.3110698461532593, "learning_rate": 1.3915781486420848e-05, "loss": 0.47809934616088867, "step": 10765 }, { "epoch": 13.209815950920245, "grad_norm": 0.2579328715801239, "learning_rate": 1.3911282839385194e-05, "loss": 0.48188352584838867, "step": 10766 }, { "epoch": 13.211042944785277, "grad_norm": 0.2311112880706787, "learning_rate": 1.3906784639304104e-05, "loss": 0.5116344690322876, "step": 10767 }, { "epoch": 13.212269938650307, "grad_norm": 0.25514495372772217, "learning_rate": 1.3902286886358884e-05, "loss": 0.5458927750587463, "step": 10768 }, { "epoch": 13.213496932515337, "grad_norm": 0.2565062344074249, "learning_rate": 1.3897789580730841e-05, "loss": 0.7279469966888428, "step": 10769 }, { "epoch": 13.214723926380367, "grad_norm": 0.25615495443344116, "learning_rate": 1.3893292722601223e-05, "loss": 0.4450657069683075, "step": 10770 }, { "epoch": 13.2159509202454, "grad_norm": 0.3963329792022705, "learning_rate": 1.3888796312151298e-05, "loss": 0.4551332890987396, "step": 10771 }, { "epoch": 13.21717791411043, "grad_norm": 0.255778431892395, "learning_rate": 1.3884300349562307e-05, "loss": 0.5000512003898621, "step": 10772 }, { "epoch": 13.21840490797546, "grad_norm": 0.31311455368995667, "learning_rate": 1.3879804835015462e-05, "loss": 0.4918065667152405, "step": 10773 }, { "epoch": 13.21963190184049, "grad_norm": 0.2553231716156006, "learning_rate": 1.387530976869198e-05, "loss": 0.7132975459098816, "step": 10774 }, { "epoch": 13.220858895705522, "grad_norm": 0.2904861569404602, "learning_rate": 1.387081515077302e-05, "loss": 0.6665104627609253, "step": 10775 }, { "epoch": 13.222085889570552, "grad_norm": 0.26538795232772827, "learning_rate": 1.3866320981439762e-05, "loss": 0.6604849696159363, "step": 10776 }, { "epoch": 13.223312883435582, "grad_norm": 0.24089424312114716, "learning_rate": 1.386182726087335e-05, "loss": 0.5441534519195557, "step": 10777 }, { "epoch": 13.224539877300613, "grad_norm": 0.2698567509651184, "learning_rate": 1.3857333989254918e-05, "loss": 0.45848149061203003, "step": 10778 }, { "epoch": 13.225766871165645, "grad_norm": 0.25496429204940796, "learning_rate": 1.3852841166765567e-05, "loss": 0.7272292375564575, "step": 10779 }, { "epoch": 13.226993865030675, "grad_norm": 0.3382684886455536, "learning_rate": 1.3848348793586402e-05, "loss": 0.5234512090682983, "step": 10780 }, { "epoch": 13.228220858895705, "grad_norm": 0.28185218572616577, "learning_rate": 1.3843856869898486e-05, "loss": 0.49198973178863525, "step": 10781 }, { "epoch": 13.229447852760735, "grad_norm": 0.29378020763397217, "learning_rate": 1.3839365395882887e-05, "loss": 0.8905601501464844, "step": 10782 }, { "epoch": 13.230674846625767, "grad_norm": 0.2815845310688019, "learning_rate": 1.383487437172064e-05, "loss": 0.6817034482955933, "step": 10783 }, { "epoch": 13.231901840490798, "grad_norm": 0.25573018193244934, "learning_rate": 1.3830383797592761e-05, "loss": 0.4105250835418701, "step": 10784 }, { "epoch": 13.233128834355828, "grad_norm": 0.24545927345752716, "learning_rate": 1.3825893673680252e-05, "loss": 0.311328262090683, "step": 10785 }, { "epoch": 13.23435582822086, "grad_norm": 0.26870253682136536, "learning_rate": 1.3821404000164096e-05, "loss": 0.5639820098876953, "step": 10786 }, { "epoch": 13.23558282208589, "grad_norm": 0.25783827900886536, "learning_rate": 1.3816914777225265e-05, "loss": 0.37970465421676636, "step": 10787 }, { "epoch": 13.23680981595092, "grad_norm": 0.28377535939216614, "learning_rate": 1.3812426005044716e-05, "loss": 0.5290566682815552, "step": 10788 }, { "epoch": 13.23803680981595, "grad_norm": 0.22558404505252838, "learning_rate": 1.380793768380335e-05, "loss": 0.48246437311172485, "step": 10789 }, { "epoch": 13.239263803680982, "grad_norm": 0.27353888750076294, "learning_rate": 1.3803449813682104e-05, "loss": 0.7985268235206604, "step": 10790 }, { "epoch": 13.240490797546013, "grad_norm": 0.3324229121208191, "learning_rate": 1.3798962394861858e-05, "loss": 0.5519530773162842, "step": 10791 }, { "epoch": 13.241717791411043, "grad_norm": 0.2966952919960022, "learning_rate": 1.379447542752349e-05, "loss": 0.6373850703239441, "step": 10792 }, { "epoch": 13.242944785276073, "grad_norm": 0.2527600824832916, "learning_rate": 1.3789988911847862e-05, "loss": 0.7481285333633423, "step": 10793 }, { "epoch": 13.244171779141105, "grad_norm": 0.24240261316299438, "learning_rate": 1.3785502848015818e-05, "loss": 0.5806789398193359, "step": 10794 }, { "epoch": 13.245398773006135, "grad_norm": 0.3250002861022949, "learning_rate": 1.378101723620816e-05, "loss": 0.3953886330127716, "step": 10795 }, { "epoch": 13.246625766871166, "grad_norm": 0.3460005521774292, "learning_rate": 1.3776532076605697e-05, "loss": 0.6574654579162598, "step": 10796 }, { "epoch": 13.247852760736196, "grad_norm": 0.3203367590904236, "learning_rate": 1.3772047369389218e-05, "loss": 0.7003446817398071, "step": 10797 }, { "epoch": 13.249079754601228, "grad_norm": 0.25454506278038025, "learning_rate": 1.3767563114739485e-05, "loss": 0.588441014289856, "step": 10798 }, { "epoch": 13.250306748466258, "grad_norm": 0.25759997963905334, "learning_rate": 1.3763079312837256e-05, "loss": 0.6611709594726562, "step": 10799 }, { "epoch": 13.251533742331288, "grad_norm": 0.24998541176319122, "learning_rate": 1.3758595963863236e-05, "loss": 0.6551293730735779, "step": 10800 }, { "epoch": 13.252760736196318, "grad_norm": 0.29971620440483093, "learning_rate": 1.3754113067998157e-05, "loss": 0.5762906670570374, "step": 10801 }, { "epoch": 13.25398773006135, "grad_norm": 0.24897855520248413, "learning_rate": 1.3749630625422704e-05, "loss": 0.6521101593971252, "step": 10802 }, { "epoch": 13.25521472392638, "grad_norm": 0.28084850311279297, "learning_rate": 1.3745148636317567e-05, "loss": 0.6417316198348999, "step": 10803 }, { "epoch": 13.256441717791411, "grad_norm": 0.2346104085445404, "learning_rate": 1.374066710086338e-05, "loss": 0.5437411665916443, "step": 10804 }, { "epoch": 13.257668711656441, "grad_norm": 0.3026069402694702, "learning_rate": 1.3736186019240788e-05, "loss": 0.45428329706192017, "step": 10805 }, { "epoch": 13.258895705521473, "grad_norm": 0.2743406593799591, "learning_rate": 1.3731705391630411e-05, "loss": 0.6804114580154419, "step": 10806 }, { "epoch": 13.260122699386503, "grad_norm": 0.2638004422187805, "learning_rate": 1.3727225218212853e-05, "loss": 0.7363522052764893, "step": 10807 }, { "epoch": 13.261349693251534, "grad_norm": 0.2618870735168457, "learning_rate": 1.3722745499168704e-05, "loss": 0.7028923034667969, "step": 10808 }, { "epoch": 13.262576687116564, "grad_norm": 0.24102073907852173, "learning_rate": 1.3718266234678509e-05, "loss": 0.6984888315200806, "step": 10809 }, { "epoch": 13.263803680981596, "grad_norm": 0.2777457535266876, "learning_rate": 1.3713787424922828e-05, "loss": 0.8080847263336182, "step": 10810 }, { "epoch": 13.265030674846626, "grad_norm": 0.3180634677410126, "learning_rate": 1.3709309070082185e-05, "loss": 0.6435424089431763, "step": 10811 }, { "epoch": 13.266257668711656, "grad_norm": 0.236502543091774, "learning_rate": 1.3704831170337094e-05, "loss": 0.6106126308441162, "step": 10812 }, { "epoch": 13.267484662576686, "grad_norm": 0.23994337022304535, "learning_rate": 1.370035372586805e-05, "loss": 0.6327890157699585, "step": 10813 }, { "epoch": 13.268711656441718, "grad_norm": 0.30027732253074646, "learning_rate": 1.3695876736855513e-05, "loss": 0.44955840706825256, "step": 10814 }, { "epoch": 13.269938650306749, "grad_norm": 0.29011526703834534, "learning_rate": 1.3691400203479942e-05, "loss": 0.7098329067230225, "step": 10815 }, { "epoch": 13.271165644171779, "grad_norm": 0.2494044452905655, "learning_rate": 1.368692412592178e-05, "loss": 0.6006591320037842, "step": 10816 }, { "epoch": 13.27239263803681, "grad_norm": 0.24124225974082947, "learning_rate": 1.3682448504361434e-05, "loss": 0.7488158941268921, "step": 10817 }, { "epoch": 13.273619631901841, "grad_norm": 0.26357609033584595, "learning_rate": 1.3677973338979324e-05, "loss": 0.643414318561554, "step": 10818 }, { "epoch": 13.274846625766871, "grad_norm": 0.2649046778678894, "learning_rate": 1.3673498629955806e-05, "loss": 0.6022815704345703, "step": 10819 }, { "epoch": 13.276073619631902, "grad_norm": 0.2427377849817276, "learning_rate": 1.3669024377471258e-05, "loss": 0.6383501291275024, "step": 10820 }, { "epoch": 13.277300613496932, "grad_norm": 0.2594558894634247, "learning_rate": 1.3664550581706015e-05, "loss": 0.6001957654953003, "step": 10821 }, { "epoch": 13.278527607361964, "grad_norm": 0.34052109718322754, "learning_rate": 1.3660077242840411e-05, "loss": 0.4331021308898926, "step": 10822 }, { "epoch": 13.279754601226994, "grad_norm": 0.2567862868309021, "learning_rate": 1.3655604361054749e-05, "loss": 0.6722515225410461, "step": 10823 }, { "epoch": 13.280981595092024, "grad_norm": 0.27114924788475037, "learning_rate": 1.365113193652932e-05, "loss": 0.6027200222015381, "step": 10824 }, { "epoch": 13.282208588957054, "grad_norm": 0.22635824978351593, "learning_rate": 1.3646659969444398e-05, "loss": 0.5663412809371948, "step": 10825 }, { "epoch": 13.283435582822086, "grad_norm": 0.23446625471115112, "learning_rate": 1.3642188459980232e-05, "loss": 0.5361613035202026, "step": 10826 }, { "epoch": 13.284662576687117, "grad_norm": 0.3173350393772125, "learning_rate": 1.3637717408317063e-05, "loss": 0.6545150279998779, "step": 10827 }, { "epoch": 13.285889570552147, "grad_norm": 0.2559313178062439, "learning_rate": 1.3633246814635087e-05, "loss": 0.7036548852920532, "step": 10828 }, { "epoch": 13.287116564417177, "grad_norm": 0.27130985260009766, "learning_rate": 1.3628776679114517e-05, "loss": 0.7312886118888855, "step": 10829 }, { "epoch": 13.28834355828221, "grad_norm": 0.2373688668012619, "learning_rate": 1.3624307001935526e-05, "loss": 0.4315127432346344, "step": 10830 }, { "epoch": 13.28957055214724, "grad_norm": 0.26444122195243835, "learning_rate": 1.3619837783278274e-05, "loss": 0.6911067962646484, "step": 10831 }, { "epoch": 13.29079754601227, "grad_norm": 0.23141101002693176, "learning_rate": 1.3615369023322915e-05, "loss": 0.3662712574005127, "step": 10832 }, { "epoch": 13.2920245398773, "grad_norm": 0.28089168667793274, "learning_rate": 1.361090072224955e-05, "loss": 0.4867095351219177, "step": 10833 }, { "epoch": 13.293251533742332, "grad_norm": 0.2626861035823822, "learning_rate": 1.3606432880238295e-05, "loss": 0.43592369556427, "step": 10834 }, { "epoch": 13.294478527607362, "grad_norm": 0.298187255859375, "learning_rate": 1.3601965497469235e-05, "loss": 0.6354990601539612, "step": 10835 }, { "epoch": 13.295705521472392, "grad_norm": 0.276712030172348, "learning_rate": 1.3597498574122436e-05, "loss": 0.4728253483772278, "step": 10836 }, { "epoch": 13.296932515337422, "grad_norm": 0.28911063075065613, "learning_rate": 1.3593032110377962e-05, "loss": 0.6629608869552612, "step": 10837 }, { "epoch": 13.298159509202454, "grad_norm": 0.2605682909488678, "learning_rate": 1.3588566106415818e-05, "loss": 0.639316201210022, "step": 10838 }, { "epoch": 13.299386503067485, "grad_norm": 0.26914072036743164, "learning_rate": 1.3584100562416027e-05, "loss": 0.7354975938796997, "step": 10839 }, { "epoch": 13.300613496932515, "grad_norm": 0.24694085121154785, "learning_rate": 1.3579635478558584e-05, "loss": 0.6128354668617249, "step": 10840 }, { "epoch": 13.301840490797545, "grad_norm": 0.2500157058238983, "learning_rate": 1.3575170855023461e-05, "loss": 0.5409916639328003, "step": 10841 }, { "epoch": 13.303067484662577, "grad_norm": 0.24862481653690338, "learning_rate": 1.357070669199063e-05, "loss": 0.6420753002166748, "step": 10842 }, { "epoch": 13.304294478527607, "grad_norm": 0.24033324420452118, "learning_rate": 1.3566242989640001e-05, "loss": 0.7270787954330444, "step": 10843 }, { "epoch": 13.305521472392638, "grad_norm": 0.2682027816772461, "learning_rate": 1.3561779748151509e-05, "loss": 0.6321262717247009, "step": 10844 }, { "epoch": 13.30674846625767, "grad_norm": 0.23037351667881012, "learning_rate": 1.3557316967705042e-05, "loss": 0.6419773101806641, "step": 10845 }, { "epoch": 13.3079754601227, "grad_norm": 0.2571915090084076, "learning_rate": 1.3552854648480514e-05, "loss": 0.512590765953064, "step": 10846 }, { "epoch": 13.30920245398773, "grad_norm": 0.24749282002449036, "learning_rate": 1.3548392790657755e-05, "loss": 0.6641426086425781, "step": 10847 }, { "epoch": 13.31042944785276, "grad_norm": 0.24183209240436554, "learning_rate": 1.3543931394416624e-05, "loss": 0.6787945032119751, "step": 10848 }, { "epoch": 13.31165644171779, "grad_norm": 0.3014459013938904, "learning_rate": 1.3539470459936943e-05, "loss": 0.3722580671310425, "step": 10849 }, { "epoch": 13.312883435582823, "grad_norm": 0.23871439695358276, "learning_rate": 1.3535009987398522e-05, "loss": 0.6159586310386658, "step": 10850 }, { "epoch": 13.314110429447853, "grad_norm": 0.2721560597419739, "learning_rate": 1.3530549976981158e-05, "loss": 0.7487979531288147, "step": 10851 }, { "epoch": 13.315337423312883, "grad_norm": 0.2568843364715576, "learning_rate": 1.3526090428864603e-05, "loss": 0.8011754155158997, "step": 10852 }, { "epoch": 13.316564417177915, "grad_norm": 0.2564160227775574, "learning_rate": 1.3521631343228619e-05, "loss": 0.6497597098350525, "step": 10853 }, { "epoch": 13.317791411042945, "grad_norm": 0.2649667263031006, "learning_rate": 1.3517172720252935e-05, "loss": 0.7471722364425659, "step": 10854 }, { "epoch": 13.319018404907975, "grad_norm": 0.2806980609893799, "learning_rate": 1.3512714560117274e-05, "loss": 0.6650514006614685, "step": 10855 }, { "epoch": 13.320245398773006, "grad_norm": 0.25356751680374146, "learning_rate": 1.3508256863001331e-05, "loss": 0.6582108736038208, "step": 10856 }, { "epoch": 13.321472392638038, "grad_norm": 0.25943395495414734, "learning_rate": 1.3503799629084773e-05, "loss": 0.5787724852561951, "step": 10857 }, { "epoch": 13.322699386503068, "grad_norm": 0.3001404404640198, "learning_rate": 1.3499342858547259e-05, "loss": 0.6114209890365601, "step": 10858 }, { "epoch": 13.323926380368098, "grad_norm": 0.23450405895709991, "learning_rate": 1.3494886551568436e-05, "loss": 0.7139983177185059, "step": 10859 }, { "epoch": 13.325153374233128, "grad_norm": 0.28005892038345337, "learning_rate": 1.3490430708327922e-05, "loss": 0.6831492185592651, "step": 10860 }, { "epoch": 13.32638036809816, "grad_norm": 0.25745803117752075, "learning_rate": 1.3485975329005333e-05, "loss": 0.5795769691467285, "step": 10861 }, { "epoch": 13.32760736196319, "grad_norm": 0.24267521500587463, "learning_rate": 1.3481520413780224e-05, "loss": 0.6630650162696838, "step": 10862 }, { "epoch": 13.32883435582822, "grad_norm": 0.30307531356811523, "learning_rate": 1.3477065962832181e-05, "loss": 0.6256469488143921, "step": 10863 }, { "epoch": 13.330061349693251, "grad_norm": 0.24373427033424377, "learning_rate": 1.3472611976340743e-05, "loss": 0.5644912123680115, "step": 10864 }, { "epoch": 13.331288343558283, "grad_norm": 0.28128260374069214, "learning_rate": 1.3468158454485436e-05, "loss": 0.6522329449653625, "step": 10865 }, { "epoch": 13.332515337423313, "grad_norm": 0.27164268493652344, "learning_rate": 1.3463705397445784e-05, "loss": 0.7733452320098877, "step": 10866 }, { "epoch": 13.333742331288343, "grad_norm": 0.25929683446884155, "learning_rate": 1.3459252805401246e-05, "loss": 0.7406058311462402, "step": 10867 }, { "epoch": 13.334969325153374, "grad_norm": 0.2282206118106842, "learning_rate": 1.3454800678531321e-05, "loss": 0.6889047622680664, "step": 10868 }, { "epoch": 13.336196319018406, "grad_norm": 0.28916075825691223, "learning_rate": 1.3450349017015452e-05, "loss": 0.7816766500473022, "step": 10869 }, { "epoch": 13.337423312883436, "grad_norm": 0.279069185256958, "learning_rate": 1.3445897821033082e-05, "loss": 0.4813489615917206, "step": 10870 }, { "epoch": 13.338650306748466, "grad_norm": 0.24995309114456177, "learning_rate": 1.3441447090763609e-05, "loss": 0.7041193246841431, "step": 10871 }, { "epoch": 13.339877300613496, "grad_norm": 0.2604973614215851, "learning_rate": 1.3436996826386439e-05, "loss": 0.7241716384887695, "step": 10872 }, { "epoch": 13.341104294478528, "grad_norm": 0.3145758807659149, "learning_rate": 1.3432547028080949e-05, "loss": 0.588619589805603, "step": 10873 }, { "epoch": 13.342331288343559, "grad_norm": 0.2781760096549988, "learning_rate": 1.3428097696026495e-05, "loss": 0.6811387538909912, "step": 10874 }, { "epoch": 13.343558282208589, "grad_norm": 0.3121272325515747, "learning_rate": 1.342364883040243e-05, "loss": 0.6389305591583252, "step": 10875 }, { "epoch": 13.344785276073619, "grad_norm": 0.2612340748310089, "learning_rate": 1.3419200431388051e-05, "loss": 0.8033462762832642, "step": 10876 }, { "epoch": 13.346012269938651, "grad_norm": 0.26152071356773376, "learning_rate": 1.3414752499162675e-05, "loss": 0.6551558971405029, "step": 10877 }, { "epoch": 13.347239263803681, "grad_norm": 0.2652563452720642, "learning_rate": 1.3410305033905585e-05, "loss": 0.6894279718399048, "step": 10878 }, { "epoch": 13.348466257668711, "grad_norm": 0.26175522804260254, "learning_rate": 1.3405858035796043e-05, "loss": 0.7265492677688599, "step": 10879 }, { "epoch": 13.349693251533742, "grad_norm": 0.2239285409450531, "learning_rate": 1.3401411505013307e-05, "loss": 0.40756288170814514, "step": 10880 }, { "epoch": 13.350920245398774, "grad_norm": 0.23799337446689606, "learning_rate": 1.339696544173658e-05, "loss": 0.7977085709571838, "step": 10881 }, { "epoch": 13.352147239263804, "grad_norm": 0.2410680204629898, "learning_rate": 1.3392519846145085e-05, "loss": 0.5551577210426331, "step": 10882 }, { "epoch": 13.353374233128834, "grad_norm": 0.26910945773124695, "learning_rate": 1.3388074718418011e-05, "loss": 0.7733653783798218, "step": 10883 }, { "epoch": 13.354601226993864, "grad_norm": 0.2612980306148529, "learning_rate": 1.3383630058734525e-05, "loss": 0.7578392028808594, "step": 10884 }, { "epoch": 13.355828220858896, "grad_norm": 0.36219727993011475, "learning_rate": 1.3379185867273792e-05, "loss": 0.7564195990562439, "step": 10885 }, { "epoch": 13.357055214723927, "grad_norm": 0.34621477127075195, "learning_rate": 1.3374742144214922e-05, "loss": 0.518768310546875, "step": 10886 }, { "epoch": 13.358282208588957, "grad_norm": 0.23935694992542267, "learning_rate": 1.3370298889737041e-05, "loss": 0.715529203414917, "step": 10887 }, { "epoch": 13.359509202453987, "grad_norm": 0.2898820638656616, "learning_rate": 1.3365856104019242e-05, "loss": 0.6755545139312744, "step": 10888 }, { "epoch": 13.360736196319019, "grad_norm": 0.29272568225860596, "learning_rate": 1.3361413787240601e-05, "loss": 0.6322387456893921, "step": 10889 }, { "epoch": 13.36196319018405, "grad_norm": 0.2741755247116089, "learning_rate": 1.3356971939580177e-05, "loss": 0.5407459735870361, "step": 10890 }, { "epoch": 13.36319018404908, "grad_norm": 0.22557632625102997, "learning_rate": 1.3352530561217005e-05, "loss": 0.5267901420593262, "step": 10891 }, { "epoch": 13.36441717791411, "grad_norm": 0.3263661563396454, "learning_rate": 1.3348089652330109e-05, "loss": 0.6303321123123169, "step": 10892 }, { "epoch": 13.365644171779142, "grad_norm": 0.2653769850730896, "learning_rate": 1.3343649213098486e-05, "loss": 0.7313907146453857, "step": 10893 }, { "epoch": 13.366871165644172, "grad_norm": 0.31998181343078613, "learning_rate": 1.3339209243701129e-05, "loss": 0.382622092962265, "step": 10894 }, { "epoch": 13.368098159509202, "grad_norm": 0.27363264560699463, "learning_rate": 1.3334769744316977e-05, "loss": 0.6805912256240845, "step": 10895 }, { "epoch": 13.369325153374232, "grad_norm": 0.26470503211021423, "learning_rate": 1.333033071512499e-05, "loss": 0.6578623056411743, "step": 10896 }, { "epoch": 13.370552147239264, "grad_norm": 0.30063408613204956, "learning_rate": 1.3325892156304088e-05, "loss": 0.6419687271118164, "step": 10897 }, { "epoch": 13.371779141104295, "grad_norm": 0.26862770318984985, "learning_rate": 1.3321454068033173e-05, "loss": 0.6442375779151917, "step": 10898 }, { "epoch": 13.373006134969325, "grad_norm": 0.2550550401210785, "learning_rate": 1.3317016450491143e-05, "loss": 0.6141718626022339, "step": 10899 }, { "epoch": 13.374233128834355, "grad_norm": 0.2764810621738434, "learning_rate": 1.3312579303856866e-05, "loss": 0.5917028784751892, "step": 10900 }, { "epoch": 13.375460122699387, "grad_norm": 0.3170434534549713, "learning_rate": 1.3308142628309173e-05, "loss": 0.4397016167640686, "step": 10901 }, { "epoch": 13.376687116564417, "grad_norm": 0.3296964764595032, "learning_rate": 1.3303706424026905e-05, "loss": 0.4530794024467468, "step": 10902 }, { "epoch": 13.377914110429447, "grad_norm": 0.2746110260486603, "learning_rate": 1.3299270691188873e-05, "loss": 0.4862595200538635, "step": 10903 }, { "epoch": 13.379141104294478, "grad_norm": 0.2643703818321228, "learning_rate": 1.3294835429973867e-05, "loss": 0.7388930320739746, "step": 10904 }, { "epoch": 13.38036809815951, "grad_norm": 0.29006561636924744, "learning_rate": 1.3290400640560668e-05, "loss": 0.47417694330215454, "step": 10905 }, { "epoch": 13.38159509202454, "grad_norm": 0.2874477505683899, "learning_rate": 1.3285966323128019e-05, "loss": 0.7193608283996582, "step": 10906 }, { "epoch": 13.38282208588957, "grad_norm": 0.4062780737876892, "learning_rate": 1.3281532477854653e-05, "loss": 0.5566567182540894, "step": 10907 }, { "epoch": 13.3840490797546, "grad_norm": 0.22970719635486603, "learning_rate": 1.3277099104919294e-05, "loss": 0.530386745929718, "step": 10908 }, { "epoch": 13.385276073619632, "grad_norm": 0.2875027358531952, "learning_rate": 1.3272666204500633e-05, "loss": 0.396120548248291, "step": 10909 }, { "epoch": 13.386503067484663, "grad_norm": 0.23419488966464996, "learning_rate": 1.3268233776777358e-05, "loss": 0.4985022246837616, "step": 10910 }, { "epoch": 13.387730061349693, "grad_norm": 0.6557098627090454, "learning_rate": 1.3263801821928101e-05, "loss": 0.6593209505081177, "step": 10911 }, { "epoch": 13.388957055214725, "grad_norm": 0.25926473736763, "learning_rate": 1.3259370340131533e-05, "loss": 0.5447147488594055, "step": 10912 }, { "epoch": 13.390184049079755, "grad_norm": 0.32319310307502747, "learning_rate": 1.3254939331566257e-05, "loss": 0.5690006017684937, "step": 10913 }, { "epoch": 13.391411042944785, "grad_norm": 0.2510317862033844, "learning_rate": 1.325050879641089e-05, "loss": 0.557968020439148, "step": 10914 }, { "epoch": 13.392638036809815, "grad_norm": 0.25263965129852295, "learning_rate": 1.3246078734843994e-05, "loss": 0.5545059442520142, "step": 10915 }, { "epoch": 13.393865030674847, "grad_norm": 0.23322772979736328, "learning_rate": 1.3241649147044138e-05, "loss": 0.5833132863044739, "step": 10916 }, { "epoch": 13.395092024539878, "grad_norm": 0.26021504402160645, "learning_rate": 1.3237220033189874e-05, "loss": 0.588018000125885, "step": 10917 }, { "epoch": 13.396319018404908, "grad_norm": 0.31202757358551025, "learning_rate": 1.323279139345972e-05, "loss": 0.6774698495864868, "step": 10918 }, { "epoch": 13.397546012269938, "grad_norm": 0.3000054955482483, "learning_rate": 1.3228363228032192e-05, "loss": 0.5241755843162537, "step": 10919 }, { "epoch": 13.39877300613497, "grad_norm": 0.2976691722869873, "learning_rate": 1.3223935537085758e-05, "loss": 0.7176745533943176, "step": 10920 }, { "epoch": 13.4, "grad_norm": 0.26446962356567383, "learning_rate": 1.3219508320798899e-05, "loss": 0.8202463388442993, "step": 10921 }, { "epoch": 13.40122699386503, "grad_norm": 0.42379897832870483, "learning_rate": 1.3215081579350058e-05, "loss": 0.6077662706375122, "step": 10922 }, { "epoch": 13.40245398773006, "grad_norm": 0.28566429018974304, "learning_rate": 1.321065531291767e-05, "loss": 0.6109662055969238, "step": 10923 }, { "epoch": 13.403680981595093, "grad_norm": 0.2859090268611908, "learning_rate": 1.3206229521680147e-05, "loss": 0.670230507850647, "step": 10924 }, { "epoch": 13.404907975460123, "grad_norm": 0.26171183586120605, "learning_rate": 1.3201804205815871e-05, "loss": 0.6989439129829407, "step": 10925 }, { "epoch": 13.406134969325153, "grad_norm": 0.3196702003479004, "learning_rate": 1.3197379365503215e-05, "loss": 0.4546216130256653, "step": 10926 }, { "epoch": 13.407361963190183, "grad_norm": 0.22897230088710785, "learning_rate": 1.3192955000920534e-05, "loss": 0.6185171008110046, "step": 10927 }, { "epoch": 13.408588957055215, "grad_norm": 0.24156230688095093, "learning_rate": 1.3188531112246164e-05, "loss": 0.7319279909133911, "step": 10928 }, { "epoch": 13.409815950920246, "grad_norm": 0.23885320127010345, "learning_rate": 1.3184107699658427e-05, "loss": 0.6614774465560913, "step": 10929 }, { "epoch": 13.411042944785276, "grad_norm": 0.2608843445777893, "learning_rate": 1.3179684763335599e-05, "loss": 0.7410428524017334, "step": 10930 }, { "epoch": 13.412269938650306, "grad_norm": 0.2666740119457245, "learning_rate": 1.3175262303455962e-05, "loss": 0.3705669045448303, "step": 10931 }, { "epoch": 13.413496932515338, "grad_norm": 0.26232174038887024, "learning_rate": 1.317084032019778e-05, "loss": 0.6581336855888367, "step": 10932 }, { "epoch": 13.414723926380368, "grad_norm": 0.2839100658893585, "learning_rate": 1.3166418813739285e-05, "loss": 0.41364264488220215, "step": 10933 }, { "epoch": 13.415950920245399, "grad_norm": 0.2605152130126953, "learning_rate": 1.31619977842587e-05, "loss": 0.6819111108779907, "step": 10934 }, { "epoch": 13.417177914110429, "grad_norm": 0.2566855549812317, "learning_rate": 1.3157577231934217e-05, "loss": 0.5400247573852539, "step": 10935 }, { "epoch": 13.41840490797546, "grad_norm": 0.3041072189807892, "learning_rate": 1.3153157156944023e-05, "loss": 0.4632911682128906, "step": 10936 }, { "epoch": 13.419631901840491, "grad_norm": 0.31826984882354736, "learning_rate": 1.3148737559466276e-05, "loss": 0.5078564286231995, "step": 10937 }, { "epoch": 13.420858895705521, "grad_norm": 0.25207844376564026, "learning_rate": 1.3144318439679126e-05, "loss": 0.588128924369812, "step": 10938 }, { "epoch": 13.422085889570551, "grad_norm": 0.226662740111351, "learning_rate": 1.3139899797760676e-05, "loss": 0.5700332522392273, "step": 10939 }, { "epoch": 13.423312883435583, "grad_norm": 0.2763010859489441, "learning_rate": 1.313548163388904e-05, "loss": 0.7743819952011108, "step": 10940 }, { "epoch": 13.424539877300614, "grad_norm": 0.28063106536865234, "learning_rate": 1.3131063948242301e-05, "loss": 0.7375638484954834, "step": 10941 }, { "epoch": 13.425766871165644, "grad_norm": 0.29999837279319763, "learning_rate": 1.3126646740998522e-05, "loss": 0.5425145626068115, "step": 10942 }, { "epoch": 13.426993865030674, "grad_norm": 0.2295914590358734, "learning_rate": 1.3122230012335759e-05, "loss": 0.5110138654708862, "step": 10943 }, { "epoch": 13.428220858895706, "grad_norm": 0.2728678584098816, "learning_rate": 1.3117813762432019e-05, "loss": 0.6834018230438232, "step": 10944 }, { "epoch": 13.429447852760736, "grad_norm": 0.276711642742157, "learning_rate": 1.3113397991465315e-05, "loss": 0.5447561740875244, "step": 10945 }, { "epoch": 13.430674846625767, "grad_norm": 0.26118004322052, "learning_rate": 1.310898269961364e-05, "loss": 0.5664608478546143, "step": 10946 }, { "epoch": 13.431901840490797, "grad_norm": 0.2978792190551758, "learning_rate": 1.3104567887054952e-05, "loss": 0.47637808322906494, "step": 10947 }, { "epoch": 13.433128834355829, "grad_norm": 0.3195802867412567, "learning_rate": 1.310015355396722e-05, "loss": 0.6807428598403931, "step": 10948 }, { "epoch": 13.434355828220859, "grad_norm": 0.2523135840892792, "learning_rate": 1.309573970052835e-05, "loss": 0.5857415199279785, "step": 10949 }, { "epoch": 13.43558282208589, "grad_norm": 0.3029724657535553, "learning_rate": 1.309132632691626e-05, "loss": 0.5837358236312866, "step": 10950 }, { "epoch": 13.43680981595092, "grad_norm": 0.29395464062690735, "learning_rate": 1.308691343330884e-05, "loss": 0.5868818759918213, "step": 10951 }, { "epoch": 13.438036809815952, "grad_norm": 0.27673467993736267, "learning_rate": 1.3082501019883961e-05, "loss": 0.670608639717102, "step": 10952 }, { "epoch": 13.439263803680982, "grad_norm": 0.3049974739551544, "learning_rate": 1.3078089086819489e-05, "loss": 0.5472995042800903, "step": 10953 }, { "epoch": 13.440490797546012, "grad_norm": 0.26556506752967834, "learning_rate": 1.307367763429323e-05, "loss": 0.5255036354064941, "step": 10954 }, { "epoch": 13.441717791411042, "grad_norm": 0.26499485969543457, "learning_rate": 1.3069266662483015e-05, "loss": 0.39893031120300293, "step": 10955 }, { "epoch": 13.442944785276074, "grad_norm": 0.27577418088912964, "learning_rate": 1.3064856171566619e-05, "loss": 0.7331757545471191, "step": 10956 }, { "epoch": 13.444171779141104, "grad_norm": 0.2533177435398102, "learning_rate": 1.3060446161721855e-05, "loss": 0.6450481414794922, "step": 10957 }, { "epoch": 13.445398773006135, "grad_norm": 0.24556639790534973, "learning_rate": 1.305603663312644e-05, "loss": 0.8315466642379761, "step": 10958 }, { "epoch": 13.446625766871165, "grad_norm": 0.2073223888874054, "learning_rate": 1.3051627585958124e-05, "loss": 0.5902161002159119, "step": 10959 }, { "epoch": 13.447852760736197, "grad_norm": 0.23759250342845917, "learning_rate": 1.3047219020394623e-05, "loss": 0.5278948545455933, "step": 10960 }, { "epoch": 13.449079754601227, "grad_norm": 0.2526575028896332, "learning_rate": 1.3042810936613631e-05, "loss": 0.5572100877761841, "step": 10961 }, { "epoch": 13.450306748466257, "grad_norm": 0.2767653167247772, "learning_rate": 1.3038403334792837e-05, "loss": 0.7905404567718506, "step": 10962 }, { "epoch": 13.451533742331288, "grad_norm": 0.2852153182029724, "learning_rate": 1.3033996215109883e-05, "loss": 0.33581194281578064, "step": 10963 }, { "epoch": 13.45276073619632, "grad_norm": 0.2909957766532898, "learning_rate": 1.302958957774241e-05, "loss": 0.6549593806266785, "step": 10964 }, { "epoch": 13.45398773006135, "grad_norm": 0.29883405566215515, "learning_rate": 1.3025183422868043e-05, "loss": 0.49748164415359497, "step": 10965 }, { "epoch": 13.45521472392638, "grad_norm": 0.25981247425079346, "learning_rate": 1.3020777750664375e-05, "loss": 0.5293545126914978, "step": 10966 }, { "epoch": 13.45644171779141, "grad_norm": 0.26693612337112427, "learning_rate": 1.3016372561309003e-05, "loss": 0.5293380618095398, "step": 10967 }, { "epoch": 13.457668711656442, "grad_norm": 0.26428669691085815, "learning_rate": 1.3011967854979465e-05, "loss": 0.7242481708526611, "step": 10968 }, { "epoch": 13.458895705521472, "grad_norm": 0.27720755338668823, "learning_rate": 1.3007563631853308e-05, "loss": 0.4617174565792084, "step": 10969 }, { "epoch": 13.460122699386503, "grad_norm": 0.26106446981430054, "learning_rate": 1.3003159892108064e-05, "loss": 0.7419780492782593, "step": 10970 }, { "epoch": 13.461349693251535, "grad_norm": 0.2853218615055084, "learning_rate": 1.2998756635921225e-05, "loss": 0.7157446146011353, "step": 10971 }, { "epoch": 13.462576687116565, "grad_norm": 0.29256775975227356, "learning_rate": 1.2994353863470288e-05, "loss": 0.620529055595398, "step": 10972 }, { "epoch": 13.463803680981595, "grad_norm": 0.2852465510368347, "learning_rate": 1.2989951574932693e-05, "loss": 0.5910348892211914, "step": 10973 }, { "epoch": 13.465030674846625, "grad_norm": 0.2446495145559311, "learning_rate": 1.2985549770485899e-05, "loss": 0.58184415102005, "step": 10974 }, { "epoch": 13.466257668711656, "grad_norm": 0.3094530701637268, "learning_rate": 1.298114845030733e-05, "loss": 0.6565002202987671, "step": 10975 }, { "epoch": 13.467484662576688, "grad_norm": 0.2915531098842621, "learning_rate": 1.297674761457438e-05, "loss": 0.7005224823951721, "step": 10976 }, { "epoch": 13.468711656441718, "grad_norm": 0.23184266686439514, "learning_rate": 1.297234726346446e-05, "loss": 0.7172101140022278, "step": 10977 }, { "epoch": 13.469938650306748, "grad_norm": 0.25133398175239563, "learning_rate": 1.2967947397154894e-05, "loss": 0.6505151987075806, "step": 10978 }, { "epoch": 13.47116564417178, "grad_norm": 0.2669391632080078, "learning_rate": 1.2963548015823063e-05, "loss": 0.6382650136947632, "step": 10979 }, { "epoch": 13.47239263803681, "grad_norm": 0.2888436019420624, "learning_rate": 1.2959149119646282e-05, "loss": 0.501835823059082, "step": 10980 }, { "epoch": 13.47361963190184, "grad_norm": 0.2590997517108917, "learning_rate": 1.2954750708801871e-05, "loss": 0.7589142322540283, "step": 10981 }, { "epoch": 13.47484662576687, "grad_norm": 0.29505646228790283, "learning_rate": 1.295035278346709e-05, "loss": 0.6229851841926575, "step": 10982 }, { "epoch": 13.476073619631903, "grad_norm": 0.28540754318237305, "learning_rate": 1.2945955343819228e-05, "loss": 0.443769633769989, "step": 10983 }, { "epoch": 13.477300613496933, "grad_norm": 0.2751709222793579, "learning_rate": 1.2941558390035521e-05, "loss": 0.4644917845726013, "step": 10984 }, { "epoch": 13.478527607361963, "grad_norm": 0.27696502208709717, "learning_rate": 1.2937161922293206e-05, "loss": 0.5393819212913513, "step": 10985 }, { "epoch": 13.479754601226993, "grad_norm": 0.26098865270614624, "learning_rate": 1.2932765940769498e-05, "loss": 0.7888553142547607, "step": 10986 }, { "epoch": 13.480981595092025, "grad_norm": 0.3104485869407654, "learning_rate": 1.2928370445641572e-05, "loss": 0.313203901052475, "step": 10987 }, { "epoch": 13.482208588957056, "grad_norm": 0.2904314398765564, "learning_rate": 1.2923975437086599e-05, "loss": 0.5947884321212769, "step": 10988 }, { "epoch": 13.483435582822086, "grad_norm": 0.290155291557312, "learning_rate": 1.2919580915281737e-05, "loss": 0.6852390766143799, "step": 10989 }, { "epoch": 13.484662576687116, "grad_norm": 0.26062697172164917, "learning_rate": 1.2915186880404115e-05, "loss": 0.5127434730529785, "step": 10990 }, { "epoch": 13.485889570552148, "grad_norm": 0.25512099266052246, "learning_rate": 1.2910793332630849e-05, "loss": 0.7687849998474121, "step": 10991 }, { "epoch": 13.487116564417178, "grad_norm": 0.2725259065628052, "learning_rate": 1.2906400272139018e-05, "loss": 0.5759000778198242, "step": 10992 }, { "epoch": 13.488343558282208, "grad_norm": 0.274128258228302, "learning_rate": 1.29020076991057e-05, "loss": 0.5135711431503296, "step": 10993 }, { "epoch": 13.489570552147239, "grad_norm": 0.27656084299087524, "learning_rate": 1.2897615613707948e-05, "loss": 0.6230019330978394, "step": 10994 }, { "epoch": 13.49079754601227, "grad_norm": 0.2902977764606476, "learning_rate": 1.289322401612279e-05, "loss": 0.7173458337783813, "step": 10995 }, { "epoch": 13.4920245398773, "grad_norm": 0.27465391159057617, "learning_rate": 1.2888832906527254e-05, "loss": 0.5137037634849548, "step": 10996 }, { "epoch": 13.493251533742331, "grad_norm": 0.23851223289966583, "learning_rate": 1.2884442285098308e-05, "loss": 0.4488685727119446, "step": 10997 }, { "epoch": 13.494478527607361, "grad_norm": 0.24151992797851562, "learning_rate": 1.2880052152012945e-05, "loss": 0.6433931589126587, "step": 10998 }, { "epoch": 13.495705521472393, "grad_norm": 0.3353462815284729, "learning_rate": 1.2875662507448108e-05, "loss": 0.535852313041687, "step": 10999 }, { "epoch": 13.496932515337424, "grad_norm": 0.2612623870372772, "learning_rate": 1.2871273351580736e-05, "loss": 0.5801329016685486, "step": 11000 }, { "epoch": 13.498159509202454, "grad_norm": 0.2692534625530243, "learning_rate": 1.2866884684587745e-05, "loss": 0.40401098132133484, "step": 11001 }, { "epoch": 13.499386503067484, "grad_norm": 0.2577580511569977, "learning_rate": 1.2862496506646027e-05, "loss": 0.545688271522522, "step": 11002 }, { "epoch": 13.500613496932516, "grad_norm": 0.30131465196609497, "learning_rate": 1.2858108817932458e-05, "loss": 0.6354950666427612, "step": 11003 }, { "epoch": 13.501840490797546, "grad_norm": 0.263459712266922, "learning_rate": 1.2853721618623894e-05, "loss": 0.6974968910217285, "step": 11004 }, { "epoch": 13.503067484662576, "grad_norm": 0.28987130522727966, "learning_rate": 1.2849334908897165e-05, "loss": 0.5563110709190369, "step": 11005 }, { "epoch": 13.504294478527607, "grad_norm": 0.25476425886154175, "learning_rate": 1.2844948688929104e-05, "loss": 0.6361656785011292, "step": 11006 }, { "epoch": 13.505521472392639, "grad_norm": 0.4618934988975525, "learning_rate": 1.2840562958896485e-05, "loss": 0.507271945476532, "step": 11007 }, { "epoch": 13.506748466257669, "grad_norm": 0.38084378838539124, "learning_rate": 1.2836177718976088e-05, "loss": 0.5469883680343628, "step": 11008 }, { "epoch": 13.5079754601227, "grad_norm": 0.2706720530986786, "learning_rate": 1.283179296934468e-05, "loss": 0.5302850008010864, "step": 11009 }, { "epoch": 13.50920245398773, "grad_norm": 0.25979939103126526, "learning_rate": 1.2827408710178993e-05, "loss": 0.6946227550506592, "step": 11010 }, { "epoch": 13.510429447852761, "grad_norm": 0.2651202976703644, "learning_rate": 1.2823024941655751e-05, "loss": 0.7021156549453735, "step": 11011 }, { "epoch": 13.511656441717792, "grad_norm": 0.28115609288215637, "learning_rate": 1.2818641663951636e-05, "loss": 0.4201884865760803, "step": 11012 }, { "epoch": 13.512883435582822, "grad_norm": 0.32909587025642395, "learning_rate": 1.281425887724333e-05, "loss": 0.5301053524017334, "step": 11013 }, { "epoch": 13.514110429447852, "grad_norm": 0.24720288813114166, "learning_rate": 1.2809876581707497e-05, "loss": 0.45689857006073, "step": 11014 }, { "epoch": 13.515337423312884, "grad_norm": 0.30014753341674805, "learning_rate": 1.2805494777520768e-05, "loss": 0.5896172523498535, "step": 11015 }, { "epoch": 13.516564417177914, "grad_norm": 0.25324276089668274, "learning_rate": 1.2801113464859776e-05, "loss": 0.5441204905509949, "step": 11016 }, { "epoch": 13.517791411042944, "grad_norm": 0.28209981322288513, "learning_rate": 1.2796732643901099e-05, "loss": 0.6293219923973083, "step": 11017 }, { "epoch": 13.519018404907975, "grad_norm": 0.24950994551181793, "learning_rate": 1.2792352314821326e-05, "loss": 0.5874244570732117, "step": 11018 }, { "epoch": 13.520245398773007, "grad_norm": 0.25586646795272827, "learning_rate": 1.2787972477797012e-05, "loss": 0.4641530513763428, "step": 11019 }, { "epoch": 13.521472392638037, "grad_norm": 0.2594330310821533, "learning_rate": 1.2783593133004698e-05, "loss": 0.6508874297142029, "step": 11020 }, { "epoch": 13.522699386503067, "grad_norm": 0.2747376561164856, "learning_rate": 1.277921428062091e-05, "loss": 0.6655300855636597, "step": 11021 }, { "epoch": 13.523926380368097, "grad_norm": 0.325344055891037, "learning_rate": 1.2774835920822133e-05, "loss": 0.6036969423294067, "step": 11022 }, { "epoch": 13.52515337423313, "grad_norm": 0.25841695070266724, "learning_rate": 1.2770458053784845e-05, "loss": 0.7536591291427612, "step": 11023 }, { "epoch": 13.52638036809816, "grad_norm": 0.24785476922988892, "learning_rate": 1.2766080679685515e-05, "loss": 0.5048996210098267, "step": 11024 }, { "epoch": 13.52760736196319, "grad_norm": 0.2507690191268921, "learning_rate": 1.2761703798700597e-05, "loss": 0.7929842472076416, "step": 11025 }, { "epoch": 13.52883435582822, "grad_norm": 0.2738397717475891, "learning_rate": 1.2757327411006484e-05, "loss": 0.85814368724823, "step": 11026 }, { "epoch": 13.530061349693252, "grad_norm": 0.25544869899749756, "learning_rate": 1.2752951516779582e-05, "loss": 0.694108247756958, "step": 11027 }, { "epoch": 13.531288343558282, "grad_norm": 0.31745320558547974, "learning_rate": 1.2748576116196279e-05, "loss": 0.48222991824150085, "step": 11028 }, { "epoch": 13.532515337423312, "grad_norm": 0.33946460485458374, "learning_rate": 1.2744201209432927e-05, "loss": 0.6182911396026611, "step": 11029 }, { "epoch": 13.533742331288344, "grad_norm": 0.3259088397026062, "learning_rate": 1.2739826796665882e-05, "loss": 0.5311397910118103, "step": 11030 }, { "epoch": 13.534969325153375, "grad_norm": 0.2304731160402298, "learning_rate": 1.273545287807144e-05, "loss": 0.7191338539123535, "step": 11031 }, { "epoch": 13.536196319018405, "grad_norm": 0.23841683566570282, "learning_rate": 1.2731079453825916e-05, "loss": 0.4739111661911011, "step": 11032 }, { "epoch": 13.537423312883435, "grad_norm": 0.23098860681056976, "learning_rate": 1.2726706524105586e-05, "loss": 0.34212636947631836, "step": 11033 }, { "epoch": 13.538650306748465, "grad_norm": 0.2779276371002197, "learning_rate": 1.272233408908671e-05, "loss": 0.6126660108566284, "step": 11034 }, { "epoch": 13.539877300613497, "grad_norm": 0.25425979495048523, "learning_rate": 1.271796214894554e-05, "loss": 0.7342022657394409, "step": 11035 }, { "epoch": 13.541104294478528, "grad_norm": 0.23995265364646912, "learning_rate": 1.2713590703858274e-05, "loss": 0.5814591646194458, "step": 11036 }, { "epoch": 13.542331288343558, "grad_norm": 0.2614571452140808, "learning_rate": 1.2709219754001129e-05, "loss": 0.5896190404891968, "step": 11037 }, { "epoch": 13.54355828220859, "grad_norm": 0.24340054392814636, "learning_rate": 1.2704849299550281e-05, "loss": 0.5574500560760498, "step": 11038 }, { "epoch": 13.54478527607362, "grad_norm": 0.3027251362800598, "learning_rate": 1.270047934068189e-05, "loss": 0.441572368144989, "step": 11039 }, { "epoch": 13.54601226993865, "grad_norm": 0.34119027853012085, "learning_rate": 1.2696109877572105e-05, "loss": 0.6047353744506836, "step": 11040 }, { "epoch": 13.54723926380368, "grad_norm": 0.2912108898162842, "learning_rate": 1.2691740910397033e-05, "loss": 0.613015353679657, "step": 11041 }, { "epoch": 13.548466257668712, "grad_norm": 0.23983672261238098, "learning_rate": 1.268737243933278e-05, "loss": 0.5890169143676758, "step": 11042 }, { "epoch": 13.549693251533743, "grad_norm": 0.23552042245864868, "learning_rate": 1.2683004464555426e-05, "loss": 0.49415522813796997, "step": 11043 }, { "epoch": 13.550920245398773, "grad_norm": 0.265011727809906, "learning_rate": 1.2678636986241033e-05, "loss": 0.6367890238761902, "step": 11044 }, { "epoch": 13.552147239263803, "grad_norm": 0.307054728269577, "learning_rate": 1.2674270004565641e-05, "loss": 0.7161886096000671, "step": 11045 }, { "epoch": 13.553374233128835, "grad_norm": 0.26381242275238037, "learning_rate": 1.266990351970527e-05, "loss": 0.42968374490737915, "step": 11046 }, { "epoch": 13.554601226993865, "grad_norm": 0.3467765152454376, "learning_rate": 1.2665537531835925e-05, "loss": 0.2367836982011795, "step": 11047 }, { "epoch": 13.555828220858896, "grad_norm": 0.25286781787872314, "learning_rate": 1.2661172041133578e-05, "loss": 0.6274235844612122, "step": 11048 }, { "epoch": 13.557055214723926, "grad_norm": 0.25047582387924194, "learning_rate": 1.2656807047774211e-05, "loss": 0.5529364347457886, "step": 11049 }, { "epoch": 13.558282208588958, "grad_norm": 0.25164565443992615, "learning_rate": 1.2652442551933732e-05, "loss": 0.6177557706832886, "step": 11050 }, { "epoch": 13.559509202453988, "grad_norm": 0.24434463679790497, "learning_rate": 1.264807855378808e-05, "loss": 0.8370779156684875, "step": 11051 }, { "epoch": 13.560736196319018, "grad_norm": 0.26419854164123535, "learning_rate": 1.2643715053513155e-05, "loss": 0.614388108253479, "step": 11052 }, { "epoch": 13.561963190184048, "grad_norm": 0.2719268500804901, "learning_rate": 1.2639352051284831e-05, "loss": 0.6241833567619324, "step": 11053 }, { "epoch": 13.56319018404908, "grad_norm": 0.2095632404088974, "learning_rate": 1.2634989547278985e-05, "loss": 0.5352842211723328, "step": 11054 }, { "epoch": 13.56441717791411, "grad_norm": 0.31007176637649536, "learning_rate": 1.263062754167143e-05, "loss": 0.7431789636611938, "step": 11055 }, { "epoch": 13.565644171779141, "grad_norm": 0.24135185778141022, "learning_rate": 1.2626266034638006e-05, "loss": 0.5141239166259766, "step": 11056 }, { "epoch": 13.566871165644171, "grad_norm": 0.27700045704841614, "learning_rate": 1.2621905026354502e-05, "loss": 0.6094964742660522, "step": 11057 }, { "epoch": 13.568098159509203, "grad_norm": 0.25949567556381226, "learning_rate": 1.261754451699671e-05, "loss": 0.8210669755935669, "step": 11058 }, { "epoch": 13.569325153374233, "grad_norm": 0.23957203328609467, "learning_rate": 1.2613184506740386e-05, "loss": 0.662135124206543, "step": 11059 }, { "epoch": 13.570552147239264, "grad_norm": 0.27046361565589905, "learning_rate": 1.260882499576126e-05, "loss": 0.31179773807525635, "step": 11060 }, { "epoch": 13.571779141104294, "grad_norm": 0.2978857457637787, "learning_rate": 1.2604465984235053e-05, "loss": 0.6402220726013184, "step": 11061 }, { "epoch": 13.573006134969326, "grad_norm": 0.2702540159225464, "learning_rate": 1.2600107472337475e-05, "loss": 0.7478369474411011, "step": 11062 }, { "epoch": 13.574233128834356, "grad_norm": 0.2533958852291107, "learning_rate": 1.2595749460244194e-05, "loss": 0.5188449621200562, "step": 11063 }, { "epoch": 13.575460122699386, "grad_norm": 0.2470468431711197, "learning_rate": 1.2591391948130888e-05, "loss": 0.5710698366165161, "step": 11064 }, { "epoch": 13.576687116564417, "grad_norm": 0.2426205724477768, "learning_rate": 1.258703493617317e-05, "loss": 0.6437839269638062, "step": 11065 }, { "epoch": 13.577914110429449, "grad_norm": 0.333308607339859, "learning_rate": 1.2582678424546671e-05, "loss": 0.607060432434082, "step": 11066 }, { "epoch": 13.579141104294479, "grad_norm": 0.23437219858169556, "learning_rate": 1.257832241342698e-05, "loss": 0.7181971669197083, "step": 11067 }, { "epoch": 13.580368098159509, "grad_norm": 0.28574511408805847, "learning_rate": 1.2573966902989703e-05, "loss": 0.6111869812011719, "step": 11068 }, { "epoch": 13.58159509202454, "grad_norm": 0.31495919823646545, "learning_rate": 1.2569611893410374e-05, "loss": 0.4554882049560547, "step": 11069 }, { "epoch": 13.582822085889571, "grad_norm": 0.3537248969078064, "learning_rate": 1.2565257384864535e-05, "loss": 0.4624307155609131, "step": 11070 }, { "epoch": 13.584049079754601, "grad_norm": 0.3405185639858246, "learning_rate": 1.2560903377527706e-05, "loss": 0.34674325585365295, "step": 11071 }, { "epoch": 13.585276073619632, "grad_norm": 0.2744825482368469, "learning_rate": 1.2556549871575383e-05, "loss": 0.6099033355712891, "step": 11072 }, { "epoch": 13.586503067484662, "grad_norm": 0.24856005609035492, "learning_rate": 1.2552196867183055e-05, "loss": 0.671565055847168, "step": 11073 }, { "epoch": 13.587730061349694, "grad_norm": 0.2473098337650299, "learning_rate": 1.2547844364526159e-05, "loss": 0.6944507360458374, "step": 11074 }, { "epoch": 13.588957055214724, "grad_norm": 0.27033689618110657, "learning_rate": 1.2543492363780143e-05, "loss": 0.5197412967681885, "step": 11075 }, { "epoch": 13.590184049079754, "grad_norm": 0.2266610711812973, "learning_rate": 1.2539140865120424e-05, "loss": 0.6067227721214294, "step": 11076 }, { "epoch": 13.591411042944785, "grad_norm": 0.269048810005188, "learning_rate": 1.2534789868722397e-05, "loss": 0.6438194513320923, "step": 11077 }, { "epoch": 13.592638036809817, "grad_norm": 0.3724275827407837, "learning_rate": 1.2530439374761443e-05, "loss": 0.4769561290740967, "step": 11078 }, { "epoch": 13.593865030674847, "grad_norm": 0.2526412308216095, "learning_rate": 1.252608938341291e-05, "loss": 0.8031161427497864, "step": 11079 }, { "epoch": 13.595092024539877, "grad_norm": 0.30588167905807495, "learning_rate": 1.2521739894852135e-05, "loss": 0.6608915328979492, "step": 11080 }, { "epoch": 13.596319018404907, "grad_norm": 0.24376854300498962, "learning_rate": 1.2517390909254433e-05, "loss": 0.5606640577316284, "step": 11081 }, { "epoch": 13.59754601226994, "grad_norm": 0.2975313663482666, "learning_rate": 1.2513042426795108e-05, "loss": 0.5503098964691162, "step": 11082 }, { "epoch": 13.59877300613497, "grad_norm": 0.29516756534576416, "learning_rate": 1.2508694447649433e-05, "loss": 0.6403674483299255, "step": 11083 }, { "epoch": 13.6, "grad_norm": 0.2904287278652191, "learning_rate": 1.2504346971992652e-05, "loss": 0.6159835457801819, "step": 11084 }, { "epoch": 13.60122699386503, "grad_norm": 0.2654244899749756, "learning_rate": 1.2500000000000006e-05, "loss": 0.7706277370452881, "step": 11085 }, { "epoch": 13.602453987730062, "grad_norm": 0.2928132712841034, "learning_rate": 1.2495653531846707e-05, "loss": 0.8382389545440674, "step": 11086 }, { "epoch": 13.603680981595092, "grad_norm": 0.3292643129825592, "learning_rate": 1.2491307567707952e-05, "loss": 0.407883882522583, "step": 11087 }, { "epoch": 13.604907975460122, "grad_norm": 0.24167917668819427, "learning_rate": 1.248696210775892e-05, "loss": 0.7920610904693604, "step": 11088 }, { "epoch": 13.606134969325154, "grad_norm": 0.29248249530792236, "learning_rate": 1.2482617152174742e-05, "loss": 0.7361388802528381, "step": 11089 }, { "epoch": 13.607361963190185, "grad_norm": 0.3225012421607971, "learning_rate": 1.2478272701130572e-05, "loss": 0.5846821069717407, "step": 11090 }, { "epoch": 13.608588957055215, "grad_norm": 0.31229421496391296, "learning_rate": 1.247392875480152e-05, "loss": 0.53005051612854, "step": 11091 }, { "epoch": 13.609815950920245, "grad_norm": 0.2529725134372711, "learning_rate": 1.246958531336268e-05, "loss": 0.922063946723938, "step": 11092 }, { "epoch": 13.611042944785275, "grad_norm": 0.26228001713752747, "learning_rate": 1.2465242376989112e-05, "loss": 0.7520585656166077, "step": 11093 }, { "epoch": 13.612269938650307, "grad_norm": 0.27830639481544495, "learning_rate": 1.2460899945855873e-05, "loss": 0.6563007831573486, "step": 11094 }, { "epoch": 13.613496932515337, "grad_norm": 0.2527615427970886, "learning_rate": 1.2456558020137994e-05, "loss": 0.7527209520339966, "step": 11095 }, { "epoch": 13.614723926380368, "grad_norm": 0.26702338457107544, "learning_rate": 1.2452216600010482e-05, "loss": 0.7386205792427063, "step": 11096 }, { "epoch": 13.6159509202454, "grad_norm": 0.2827472686767578, "learning_rate": 1.2447875685648347e-05, "loss": 0.5572912693023682, "step": 11097 }, { "epoch": 13.61717791411043, "grad_norm": 0.2872954308986664, "learning_rate": 1.244353527722653e-05, "loss": 0.632194995880127, "step": 11098 }, { "epoch": 13.61840490797546, "grad_norm": 0.2726258933544159, "learning_rate": 1.2439195374919995e-05, "loss": 0.8793478608131409, "step": 11099 }, { "epoch": 13.61963190184049, "grad_norm": 0.2300645112991333, "learning_rate": 1.243485597890367e-05, "loss": 0.6222211122512817, "step": 11100 }, { "epoch": 13.62085889570552, "grad_norm": 0.23295986652374268, "learning_rate": 1.2430517089352464e-05, "loss": 0.6088871955871582, "step": 11101 }, { "epoch": 13.622085889570553, "grad_norm": 0.25238707661628723, "learning_rate": 1.2426178706441266e-05, "loss": 0.6185896396636963, "step": 11102 }, { "epoch": 13.623312883435583, "grad_norm": 0.28104931116104126, "learning_rate": 1.242184083034495e-05, "loss": 0.6118002533912659, "step": 11103 }, { "epoch": 13.624539877300613, "grad_norm": 0.2363937646150589, "learning_rate": 1.2417503461238346e-05, "loss": 0.4908238649368286, "step": 11104 }, { "epoch": 13.625766871165645, "grad_norm": 0.2749103307723999, "learning_rate": 1.2413166599296293e-05, "loss": 0.604740560054779, "step": 11105 }, { "epoch": 13.626993865030675, "grad_norm": 0.2756788730621338, "learning_rate": 1.2408830244693598e-05, "loss": 0.8223901987075806, "step": 11106 }, { "epoch": 13.628220858895705, "grad_norm": 0.23424281179904938, "learning_rate": 1.2404494397605041e-05, "loss": 0.6082140207290649, "step": 11107 }, { "epoch": 13.629447852760736, "grad_norm": 0.21738828718662262, "learning_rate": 1.2400159058205402e-05, "loss": 0.4572167992591858, "step": 11108 }, { "epoch": 13.630674846625768, "grad_norm": 0.23140408098697662, "learning_rate": 1.2395824226669406e-05, "loss": 0.669317364692688, "step": 11109 }, { "epoch": 13.631901840490798, "grad_norm": 0.2560410797595978, "learning_rate": 1.2391489903171785e-05, "loss": 0.6054297089576721, "step": 11110 }, { "epoch": 13.633128834355828, "grad_norm": 0.3271888196468353, "learning_rate": 1.238715608788724e-05, "loss": 0.6415320038795471, "step": 11111 }, { "epoch": 13.634355828220858, "grad_norm": 0.2723580300807953, "learning_rate": 1.2382822780990474e-05, "loss": 0.6438298225402832, "step": 11112 }, { "epoch": 13.63558282208589, "grad_norm": 0.2849313020706177, "learning_rate": 1.237848998265613e-05, "loss": 0.734352707862854, "step": 11113 }, { "epoch": 13.63680981595092, "grad_norm": 0.230479896068573, "learning_rate": 1.2374157693058858e-05, "loss": 0.33718809485435486, "step": 11114 }, { "epoch": 13.63803680981595, "grad_norm": 0.2947096824645996, "learning_rate": 1.2369825912373279e-05, "loss": 0.7770975828170776, "step": 11115 }, { "epoch": 13.639263803680981, "grad_norm": 0.29608020186424255, "learning_rate": 1.2365494640773991e-05, "loss": 0.6143537163734436, "step": 11116 }, { "epoch": 13.640490797546013, "grad_norm": 0.2571670711040497, "learning_rate": 1.2361163878435594e-05, "loss": 0.7880164980888367, "step": 11117 }, { "epoch": 13.641717791411043, "grad_norm": 0.2627091109752655, "learning_rate": 1.2356833625532624e-05, "loss": 0.6810132265090942, "step": 11118 }, { "epoch": 13.642944785276073, "grad_norm": 0.27831342816352844, "learning_rate": 1.2352503882239627e-05, "loss": 0.680800199508667, "step": 11119 }, { "epoch": 13.644171779141104, "grad_norm": 0.2643286883831024, "learning_rate": 1.2348174648731132e-05, "loss": 0.5323182344436646, "step": 11120 }, { "epoch": 13.645398773006136, "grad_norm": 0.29050445556640625, "learning_rate": 1.234384592518163e-05, "loss": 0.4231616258621216, "step": 11121 }, { "epoch": 13.646625766871166, "grad_norm": 0.38003870844841003, "learning_rate": 1.233951771176561e-05, "loss": 0.4206690192222595, "step": 11122 }, { "epoch": 13.647852760736196, "grad_norm": 0.29478374123573303, "learning_rate": 1.2335190008657516e-05, "loss": 0.6296816468238831, "step": 11123 }, { "epoch": 13.649079754601226, "grad_norm": 0.23512259125709534, "learning_rate": 1.233086281603179e-05, "loss": 0.6902910470962524, "step": 11124 }, { "epoch": 13.650306748466258, "grad_norm": 0.29604387283325195, "learning_rate": 1.2326536134062849e-05, "loss": 0.48085787892341614, "step": 11125 }, { "epoch": 13.651533742331289, "grad_norm": 0.286883682012558, "learning_rate": 1.2322209962925093e-05, "loss": 0.6375619769096375, "step": 11126 }, { "epoch": 13.652760736196319, "grad_norm": 0.30507636070251465, "learning_rate": 1.2317884302792904e-05, "loss": 0.4830619692802429, "step": 11127 }, { "epoch": 13.653987730061349, "grad_norm": 0.3169995844364166, "learning_rate": 1.231355915384062e-05, "loss": 0.7127184867858887, "step": 11128 }, { "epoch": 13.655214723926381, "grad_norm": 0.24650904536247253, "learning_rate": 1.2309234516242582e-05, "loss": 0.6133365631103516, "step": 11129 }, { "epoch": 13.656441717791411, "grad_norm": 0.4804665744304657, "learning_rate": 1.2304910390173105e-05, "loss": 0.6882556676864624, "step": 11130 }, { "epoch": 13.657668711656441, "grad_norm": 0.2602899670600891, "learning_rate": 1.2300586775806483e-05, "loss": 0.6062902212142944, "step": 11131 }, { "epoch": 13.658895705521472, "grad_norm": 0.2563963830471039, "learning_rate": 1.2296263673317e-05, "loss": 0.5382642149925232, "step": 11132 }, { "epoch": 13.660122699386504, "grad_norm": 0.26471468806266785, "learning_rate": 1.2291941082878883e-05, "loss": 0.5635679364204407, "step": 11133 }, { "epoch": 13.661349693251534, "grad_norm": 0.283500999212265, "learning_rate": 1.228761900466637e-05, "loss": 0.7701070308685303, "step": 11134 }, { "epoch": 13.662576687116564, "grad_norm": 0.2591943144798279, "learning_rate": 1.2283297438853686e-05, "loss": 0.7815943360328674, "step": 11135 }, { "epoch": 13.663803680981594, "grad_norm": 0.30285346508026123, "learning_rate": 1.2278976385615024e-05, "loss": 0.7083743810653687, "step": 11136 }, { "epoch": 13.665030674846626, "grad_norm": 0.24271327257156372, "learning_rate": 1.2274655845124531e-05, "loss": 0.8639657497406006, "step": 11137 }, { "epoch": 13.666257668711657, "grad_norm": 0.2644650638103485, "learning_rate": 1.2270335817556366e-05, "loss": 0.44531333446502686, "step": 11138 }, { "epoch": 13.667484662576687, "grad_norm": 0.29974302649497986, "learning_rate": 1.226601630308466e-05, "loss": 0.5453081727027893, "step": 11139 }, { "epoch": 13.668711656441717, "grad_norm": 0.2250595986843109, "learning_rate": 1.2261697301883518e-05, "loss": 0.5459855198860168, "step": 11140 }, { "epoch": 13.669938650306749, "grad_norm": 0.2631866931915283, "learning_rate": 1.2257378814127037e-05, "loss": 0.5450947284698486, "step": 11141 }, { "epoch": 13.67116564417178, "grad_norm": 0.2594379484653473, "learning_rate": 1.2253060839989264e-05, "loss": 0.7956702709197998, "step": 11142 }, { "epoch": 13.67239263803681, "grad_norm": 0.25082120299339294, "learning_rate": 1.224874337964425e-05, "loss": 0.6037694215774536, "step": 11143 }, { "epoch": 13.67361963190184, "grad_norm": 0.31731367111206055, "learning_rate": 1.2244426433266026e-05, "loss": 0.7842788696289062, "step": 11144 }, { "epoch": 13.674846625766872, "grad_norm": 0.2798030376434326, "learning_rate": 1.2240110001028593e-05, "loss": 0.43002331256866455, "step": 11145 }, { "epoch": 13.676073619631902, "grad_norm": 0.28782376646995544, "learning_rate": 1.2235794083105939e-05, "loss": 0.6001834273338318, "step": 11146 }, { "epoch": 13.677300613496932, "grad_norm": 0.31727004051208496, "learning_rate": 1.2231478679672015e-05, "loss": 0.6649288535118103, "step": 11147 }, { "epoch": 13.678527607361962, "grad_norm": 0.20287692546844482, "learning_rate": 1.2227163790900768e-05, "loss": 0.48008960485458374, "step": 11148 }, { "epoch": 13.679754601226994, "grad_norm": 0.34301140904426575, "learning_rate": 1.2222849416966117e-05, "loss": 0.596513569355011, "step": 11149 }, { "epoch": 13.680981595092025, "grad_norm": 0.32089555263519287, "learning_rate": 1.2218535558041966e-05, "loss": 0.5621054768562317, "step": 11150 }, { "epoch": 13.682208588957055, "grad_norm": 0.28945571184158325, "learning_rate": 1.2214222214302205e-05, "loss": 0.6773971319198608, "step": 11151 }, { "epoch": 13.683435582822085, "grad_norm": 0.314881294965744, "learning_rate": 1.2209909385920665e-05, "loss": 0.6022857427597046, "step": 11152 }, { "epoch": 13.684662576687117, "grad_norm": 0.2622579038143158, "learning_rate": 1.2205597073071204e-05, "loss": 0.6128886342048645, "step": 11153 }, { "epoch": 13.685889570552147, "grad_norm": 0.2321210503578186, "learning_rate": 1.2201285275927632e-05, "loss": 0.6109766960144043, "step": 11154 }, { "epoch": 13.687116564417177, "grad_norm": 0.2934842109680176, "learning_rate": 1.2196973994663746e-05, "loss": 0.5269434452056885, "step": 11155 }, { "epoch": 13.68834355828221, "grad_norm": 0.2772541344165802, "learning_rate": 1.2192663229453327e-05, "loss": 0.6137343049049377, "step": 11156 }, { "epoch": 13.68957055214724, "grad_norm": 0.255206435918808, "learning_rate": 1.2188352980470122e-05, "loss": 0.5845413208007812, "step": 11157 }, { "epoch": 13.69079754601227, "grad_norm": 0.33610931038856506, "learning_rate": 1.218404324788787e-05, "loss": 0.5528470873832703, "step": 11158 }, { "epoch": 13.6920245398773, "grad_norm": 0.26607072353363037, "learning_rate": 1.2179734031880283e-05, "loss": 0.5666837692260742, "step": 11159 }, { "epoch": 13.69325153374233, "grad_norm": 0.28955820202827454, "learning_rate": 1.217542533262106e-05, "loss": 0.568739116191864, "step": 11160 }, { "epoch": 13.694478527607362, "grad_norm": 0.3435809016227722, "learning_rate": 1.2171117150283856e-05, "loss": 0.561711847782135, "step": 11161 }, { "epoch": 13.695705521472393, "grad_norm": 0.2575376033782959, "learning_rate": 1.2166809485042333e-05, "loss": 0.6767789125442505, "step": 11162 }, { "epoch": 13.696932515337423, "grad_norm": 0.3355453908443451, "learning_rate": 1.2162502337070117e-05, "loss": 0.6211027503013611, "step": 11163 }, { "epoch": 13.698159509202455, "grad_norm": 0.23862221837043762, "learning_rate": 1.2158195706540818e-05, "loss": 0.5370074510574341, "step": 11164 }, { "epoch": 13.699386503067485, "grad_norm": 0.26279333233833313, "learning_rate": 1.2153889593628032e-05, "loss": 0.6405301094055176, "step": 11165 }, { "epoch": 13.700613496932515, "grad_norm": 0.3462349772453308, "learning_rate": 1.214958399850531e-05, "loss": 0.5169960856437683, "step": 11166 }, { "epoch": 13.701840490797546, "grad_norm": 0.2786642014980316, "learning_rate": 1.2145278921346206e-05, "loss": 0.706124484539032, "step": 11167 }, { "epoch": 13.703067484662578, "grad_norm": 0.26055702567100525, "learning_rate": 1.2140974362324247e-05, "loss": 0.691865086555481, "step": 11168 }, { "epoch": 13.704294478527608, "grad_norm": 0.2487066090106964, "learning_rate": 1.2136670321612936e-05, "loss": 0.6339921951293945, "step": 11169 }, { "epoch": 13.705521472392638, "grad_norm": 0.32701241970062256, "learning_rate": 1.2132366799385766e-05, "loss": 0.643517255783081, "step": 11170 }, { "epoch": 13.706748466257668, "grad_norm": 0.28207382559776306, "learning_rate": 1.212806379581618e-05, "loss": 0.8094348907470703, "step": 11171 }, { "epoch": 13.7079754601227, "grad_norm": 0.2752991318702698, "learning_rate": 1.2123761311077633e-05, "loss": 0.628974199295044, "step": 11172 }, { "epoch": 13.70920245398773, "grad_norm": 0.2629593014717102, "learning_rate": 1.2119459345343543e-05, "loss": 0.693202018737793, "step": 11173 }, { "epoch": 13.71042944785276, "grad_norm": 0.2736702561378479, "learning_rate": 1.211515789878731e-05, "loss": 0.7234095335006714, "step": 11174 }, { "epoch": 13.71165644171779, "grad_norm": 0.24928535521030426, "learning_rate": 1.2110856971582321e-05, "loss": 0.6605188250541687, "step": 11175 }, { "epoch": 13.712883435582823, "grad_norm": 0.30047038197517395, "learning_rate": 1.2106556563901918e-05, "loss": 0.7421045303344727, "step": 11176 }, { "epoch": 13.714110429447853, "grad_norm": 0.27573156356811523, "learning_rate": 1.2102256675919448e-05, "loss": 0.6657774448394775, "step": 11177 }, { "epoch": 13.715337423312883, "grad_norm": 0.26453346014022827, "learning_rate": 1.2097957307808216e-05, "loss": 0.7514351606369019, "step": 11178 }, { "epoch": 13.716564417177914, "grad_norm": 0.24685053527355194, "learning_rate": 1.2093658459741544e-05, "loss": 0.6476011276245117, "step": 11179 }, { "epoch": 13.717791411042946, "grad_norm": 0.5599517822265625, "learning_rate": 1.2089360131892683e-05, "loss": 0.91633141040802, "step": 11180 }, { "epoch": 13.719018404907976, "grad_norm": 0.27373573184013367, "learning_rate": 1.208506232443489e-05, "loss": 0.6789394617080688, "step": 11181 }, { "epoch": 13.720245398773006, "grad_norm": 0.313525915145874, "learning_rate": 1.2080765037541402e-05, "loss": 0.6939299702644348, "step": 11182 }, { "epoch": 13.721472392638036, "grad_norm": 0.29125094413757324, "learning_rate": 1.207646827138543e-05, "loss": 0.6619285941123962, "step": 11183 }, { "epoch": 13.722699386503068, "grad_norm": 0.25620099902153015, "learning_rate": 1.2072172026140168e-05, "loss": 0.5053766965866089, "step": 11184 }, { "epoch": 13.723926380368098, "grad_norm": 0.23941946029663086, "learning_rate": 1.2067876301978775e-05, "loss": 0.6624912023544312, "step": 11185 }, { "epoch": 13.725153374233129, "grad_norm": 0.22545742988586426, "learning_rate": 1.2063581099074404e-05, "loss": 0.5522646903991699, "step": 11186 }, { "epoch": 13.726380368098159, "grad_norm": 0.28097546100616455, "learning_rate": 1.2059286417600179e-05, "loss": 0.6008327007293701, "step": 11187 }, { "epoch": 13.72760736196319, "grad_norm": 2.3979382514953613, "learning_rate": 1.2054992257729214e-05, "loss": 0.5357536673545837, "step": 11188 }, { "epoch": 13.728834355828221, "grad_norm": 0.26489683985710144, "learning_rate": 1.20506986196346e-05, "loss": 0.6990901231765747, "step": 11189 }, { "epoch": 13.730061349693251, "grad_norm": 0.2985924482345581, "learning_rate": 1.2046405503489383e-05, "loss": 0.6452101469039917, "step": 11190 }, { "epoch": 13.731288343558282, "grad_norm": 0.23300859332084656, "learning_rate": 1.2042112909466616e-05, "loss": 0.5553978681564331, "step": 11191 }, { "epoch": 13.732515337423314, "grad_norm": 0.23416252434253693, "learning_rate": 1.2037820837739317e-05, "loss": 0.6674107313156128, "step": 11192 }, { "epoch": 13.733742331288344, "grad_norm": 0.2858032286167145, "learning_rate": 1.2033529288480494e-05, "loss": 0.45957815647125244, "step": 11193 }, { "epoch": 13.734969325153374, "grad_norm": 0.29493868350982666, "learning_rate": 1.2029238261863131e-05, "loss": 0.48533952236175537, "step": 11194 }, { "epoch": 13.736196319018404, "grad_norm": 0.35175514221191406, "learning_rate": 1.2024947758060174e-05, "loss": 0.436396986246109, "step": 11195 }, { "epoch": 13.737423312883436, "grad_norm": 0.31106898188591003, "learning_rate": 1.2020657777244562e-05, "loss": 0.6006264686584473, "step": 11196 }, { "epoch": 13.738650306748466, "grad_norm": 0.27821701765060425, "learning_rate": 1.2016368319589222e-05, "loss": 0.6297906041145325, "step": 11197 }, { "epoch": 13.739877300613497, "grad_norm": 0.2653171718120575, "learning_rate": 1.2012079385267041e-05, "loss": 0.7379478216171265, "step": 11198 }, { "epoch": 13.741104294478527, "grad_norm": 0.2623804211616516, "learning_rate": 1.2007790974450909e-05, "loss": 0.5664676427841187, "step": 11199 }, { "epoch": 13.742331288343559, "grad_norm": 0.3013066351413727, "learning_rate": 1.2003503087313645e-05, "loss": 0.49103260040283203, "step": 11200 }, { "epoch": 13.743558282208589, "grad_norm": 0.30448421835899353, "learning_rate": 1.1999215724028118e-05, "loss": 0.6317253112792969, "step": 11201 }, { "epoch": 13.74478527607362, "grad_norm": 0.28640592098236084, "learning_rate": 1.1994928884767122e-05, "loss": 0.7111399173736572, "step": 11202 }, { "epoch": 13.74601226993865, "grad_norm": 0.298176646232605, "learning_rate": 1.1990642569703464e-05, "loss": 0.8506330251693726, "step": 11203 }, { "epoch": 13.747239263803682, "grad_norm": 0.30699238181114197, "learning_rate": 1.1986356779009888e-05, "loss": 0.6437555551528931, "step": 11204 }, { "epoch": 13.748466257668712, "grad_norm": 0.3166787028312683, "learning_rate": 1.1982071512859158e-05, "loss": 0.6336507201194763, "step": 11205 }, { "epoch": 13.749693251533742, "grad_norm": 0.30455654859542847, "learning_rate": 1.1977786771423994e-05, "loss": 0.5546244382858276, "step": 11206 }, { "epoch": 13.750920245398772, "grad_norm": 0.28467056155204773, "learning_rate": 1.1973502554877106e-05, "loss": 0.6485801935195923, "step": 11207 }, { "epoch": 13.752147239263804, "grad_norm": 0.2618934214115143, "learning_rate": 1.1969218863391176e-05, "loss": 0.5267326831817627, "step": 11208 }, { "epoch": 13.753374233128834, "grad_norm": 0.2859016954898834, "learning_rate": 1.1964935697138878e-05, "loss": 0.5438745617866516, "step": 11209 }, { "epoch": 13.754601226993865, "grad_norm": 0.4233343303203583, "learning_rate": 1.1960653056292836e-05, "loss": 0.5833085775375366, "step": 11210 }, { "epoch": 13.755828220858895, "grad_norm": 0.27691298723220825, "learning_rate": 1.195637094102568e-05, "loss": 0.6203272938728333, "step": 11211 }, { "epoch": 13.757055214723927, "grad_norm": 0.2793075740337372, "learning_rate": 1.195208935151001e-05, "loss": 0.6557212471961975, "step": 11212 }, { "epoch": 13.758282208588957, "grad_norm": 0.2791461944580078, "learning_rate": 1.1947808287918404e-05, "loss": 0.6468729972839355, "step": 11213 }, { "epoch": 13.759509202453987, "grad_norm": 0.3560229241847992, "learning_rate": 1.1943527750423431e-05, "loss": 0.7922817468643188, "step": 11214 }, { "epoch": 13.76073619631902, "grad_norm": 0.24211442470550537, "learning_rate": 1.1939247739197604e-05, "loss": 0.6518042087554932, "step": 11215 }, { "epoch": 13.76196319018405, "grad_norm": 0.29909807443618774, "learning_rate": 1.1934968254413453e-05, "loss": 0.6690322756767273, "step": 11216 }, { "epoch": 13.76319018404908, "grad_norm": 0.25557899475097656, "learning_rate": 1.1930689296243467e-05, "loss": 0.4474170506000519, "step": 11217 }, { "epoch": 13.76441717791411, "grad_norm": 0.31393367052078247, "learning_rate": 1.1926410864860122e-05, "loss": 0.7982404232025146, "step": 11218 }, { "epoch": 13.76564417177914, "grad_norm": 0.2730744481086731, "learning_rate": 1.1922132960435875e-05, "loss": 0.6136809587478638, "step": 11219 }, { "epoch": 13.766871165644172, "grad_norm": 0.26196834444999695, "learning_rate": 1.1917855583143143e-05, "loss": 0.41054022312164307, "step": 11220 }, { "epoch": 13.768098159509202, "grad_norm": 0.2639623284339905, "learning_rate": 1.1913578733154338e-05, "loss": 0.4738192558288574, "step": 11221 }, { "epoch": 13.769325153374233, "grad_norm": 0.3175335228443146, "learning_rate": 1.190930241064185e-05, "loss": 0.5403847694396973, "step": 11222 }, { "epoch": 13.770552147239265, "grad_norm": 0.2578526437282562, "learning_rate": 1.190502661577805e-05, "loss": 0.7213690876960754, "step": 11223 }, { "epoch": 13.771779141104295, "grad_norm": 0.3544798791408539, "learning_rate": 1.1900751348735276e-05, "loss": 0.6135430335998535, "step": 11224 }, { "epoch": 13.773006134969325, "grad_norm": 0.2738911807537079, "learning_rate": 1.1896476609685855e-05, "loss": 0.6235277652740479, "step": 11225 }, { "epoch": 13.774233128834355, "grad_norm": 0.34403660893440247, "learning_rate": 1.189220239880209e-05, "loss": 0.5894622802734375, "step": 11226 }, { "epoch": 13.775460122699386, "grad_norm": 0.2662353515625, "learning_rate": 1.1887928716256263e-05, "loss": 0.7540881633758545, "step": 11227 }, { "epoch": 13.776687116564418, "grad_norm": 0.2668446898460388, "learning_rate": 1.188365556222064e-05, "loss": 0.6529636383056641, "step": 11228 }, { "epoch": 13.777914110429448, "grad_norm": 0.27332666516304016, "learning_rate": 1.1879382936867448e-05, "loss": 0.5621218681335449, "step": 11229 }, { "epoch": 13.779141104294478, "grad_norm": 0.26813599467277527, "learning_rate": 1.1875110840368902e-05, "loss": 0.6024810075759888, "step": 11230 }, { "epoch": 13.78036809815951, "grad_norm": 0.28398096561431885, "learning_rate": 1.1870839272897209e-05, "loss": 0.4689868092536926, "step": 11231 }, { "epoch": 13.78159509202454, "grad_norm": 0.2809564769268036, "learning_rate": 1.1866568234624537e-05, "loss": 0.5776023864746094, "step": 11232 }, { "epoch": 13.78282208588957, "grad_norm": 0.2674417197704315, "learning_rate": 1.1862297725723052e-05, "loss": 0.5718135833740234, "step": 11233 }, { "epoch": 13.7840490797546, "grad_norm": 0.2713136672973633, "learning_rate": 1.1858027746364867e-05, "loss": 0.8404952883720398, "step": 11234 }, { "epoch": 13.785276073619633, "grad_norm": 0.30122795701026917, "learning_rate": 1.1853758296722101e-05, "loss": 0.537402868270874, "step": 11235 }, { "epoch": 13.786503067484663, "grad_norm": 0.2772538959980011, "learning_rate": 1.184948937696684e-05, "loss": 0.6184855699539185, "step": 11236 }, { "epoch": 13.787730061349693, "grad_norm": 0.2740979790687561, "learning_rate": 1.184522098727116e-05, "loss": 0.6535441875457764, "step": 11237 }, { "epoch": 13.788957055214723, "grad_norm": 0.32275059819221497, "learning_rate": 1.1840953127807115e-05, "loss": 0.5823701620101929, "step": 11238 }, { "epoch": 13.790184049079755, "grad_norm": 0.22883550822734833, "learning_rate": 1.1836685798746704e-05, "loss": 0.48521286249160767, "step": 11239 }, { "epoch": 13.791411042944786, "grad_norm": 0.2796188294887543, "learning_rate": 1.1832419000261949e-05, "loss": 0.7363717555999756, "step": 11240 }, { "epoch": 13.792638036809816, "grad_norm": 0.2833576500415802, "learning_rate": 1.1828152732524828e-05, "loss": 0.4782877564430237, "step": 11241 }, { "epoch": 13.793865030674846, "grad_norm": 0.23804792761802673, "learning_rate": 1.1823886995707303e-05, "loss": 0.5282343626022339, "step": 11242 }, { "epoch": 13.795092024539878, "grad_norm": 0.3144821524620056, "learning_rate": 1.1819621789981323e-05, "loss": 0.43331971764564514, "step": 11243 }, { "epoch": 13.796319018404908, "grad_norm": 0.2504515051841736, "learning_rate": 1.1815357115518788e-05, "loss": 0.658096432685852, "step": 11244 }, { "epoch": 13.797546012269938, "grad_norm": 0.2784116566181183, "learning_rate": 1.1811092972491594e-05, "loss": 0.5869176983833313, "step": 11245 }, { "epoch": 13.798773006134969, "grad_norm": 0.2953088581562042, "learning_rate": 1.1806829361071634e-05, "loss": 0.457263708114624, "step": 11246 }, { "epoch": 13.8, "grad_norm": 0.33014976978302, "learning_rate": 1.1802566281430766e-05, "loss": 0.3210085928440094, "step": 11247 }, { "epoch": 13.801226993865031, "grad_norm": 0.2576883137226105, "learning_rate": 1.1798303733740802e-05, "loss": 0.5331533551216125, "step": 11248 }, { "epoch": 13.802453987730061, "grad_norm": 0.28492218255996704, "learning_rate": 1.179404171817356e-05, "loss": 0.6443703174591064, "step": 11249 }, { "epoch": 13.803680981595091, "grad_norm": 0.2551546096801758, "learning_rate": 1.1789780234900836e-05, "loss": 0.6962275505065918, "step": 11250 }, { "epoch": 13.804907975460123, "grad_norm": 0.2611805200576782, "learning_rate": 1.178551928409439e-05, "loss": 0.634803056716919, "step": 11251 }, { "epoch": 13.806134969325154, "grad_norm": 0.2755773067474365, "learning_rate": 1.1781258865925982e-05, "loss": 0.5375231504440308, "step": 11252 }, { "epoch": 13.807361963190184, "grad_norm": 0.2807263135910034, "learning_rate": 1.1776998980567319e-05, "loss": 0.519310474395752, "step": 11253 }, { "epoch": 13.808588957055214, "grad_norm": 0.2567072808742523, "learning_rate": 1.1772739628190117e-05, "loss": 0.3927823305130005, "step": 11254 }, { "epoch": 13.809815950920246, "grad_norm": 0.27398884296417236, "learning_rate": 1.1768480808966051e-05, "loss": 0.6911835670471191, "step": 11255 }, { "epoch": 13.811042944785276, "grad_norm": 0.25879549980163574, "learning_rate": 1.1764222523066789e-05, "loss": 0.6457359194755554, "step": 11256 }, { "epoch": 13.812269938650306, "grad_norm": 0.2529967427253723, "learning_rate": 1.1759964770663975e-05, "loss": 0.6318457126617432, "step": 11257 }, { "epoch": 13.813496932515337, "grad_norm": 0.2236788272857666, "learning_rate": 1.1755707551929212e-05, "loss": 0.6847447156906128, "step": 11258 }, { "epoch": 13.814723926380369, "grad_norm": 0.32016223669052124, "learning_rate": 1.1751450867034103e-05, "loss": 0.3965871334075928, "step": 11259 }, { "epoch": 13.815950920245399, "grad_norm": 0.2587730884552002, "learning_rate": 1.1747194716150223e-05, "loss": 0.474867582321167, "step": 11260 }, { "epoch": 13.81717791411043, "grad_norm": 0.24959991872310638, "learning_rate": 1.1742939099449126e-05, "loss": 0.6292296648025513, "step": 11261 }, { "epoch": 13.81840490797546, "grad_norm": 0.25478947162628174, "learning_rate": 1.1738684017102352e-05, "loss": 0.7611986398696899, "step": 11262 }, { "epoch": 13.819631901840491, "grad_norm": 0.28339675068855286, "learning_rate": 1.1734429469281394e-05, "loss": 0.6896242499351501, "step": 11263 }, { "epoch": 13.820858895705522, "grad_norm": 0.29015281796455383, "learning_rate": 1.1730175456157752e-05, "loss": 0.599306583404541, "step": 11264 }, { "epoch": 13.822085889570552, "grad_norm": 0.2880566120147705, "learning_rate": 1.1725921977902887e-05, "loss": 0.5063973665237427, "step": 11265 }, { "epoch": 13.823312883435582, "grad_norm": 0.25441795587539673, "learning_rate": 1.172166903468825e-05, "loss": 0.5458641052246094, "step": 11266 }, { "epoch": 13.824539877300614, "grad_norm": 0.23280037939548492, "learning_rate": 1.1717416626685262e-05, "loss": 0.7070562243461609, "step": 11267 }, { "epoch": 13.825766871165644, "grad_norm": 0.3803795278072357, "learning_rate": 1.1713164754065328e-05, "loss": 0.49737682938575745, "step": 11268 }, { "epoch": 13.826993865030675, "grad_norm": 0.2172265499830246, "learning_rate": 1.1708913416999829e-05, "loss": 0.5243861675262451, "step": 11269 }, { "epoch": 13.828220858895705, "grad_norm": 0.23642417788505554, "learning_rate": 1.170466261566012e-05, "loss": 0.5575158596038818, "step": 11270 }, { "epoch": 13.829447852760737, "grad_norm": 0.3467501997947693, "learning_rate": 1.1700412350217551e-05, "loss": 0.6193557977676392, "step": 11271 }, { "epoch": 13.830674846625767, "grad_norm": 0.30424579977989197, "learning_rate": 1.169616262084342e-05, "loss": 0.7014033794403076, "step": 11272 }, { "epoch": 13.831901840490797, "grad_norm": 0.37346628308296204, "learning_rate": 1.169191342770903e-05, "loss": 0.37626928091049194, "step": 11273 }, { "epoch": 13.833128834355827, "grad_norm": 0.27071163058280945, "learning_rate": 1.1687664770985655e-05, "loss": 0.6046661138534546, "step": 11274 }, { "epoch": 13.83435582822086, "grad_norm": 0.29053929448127747, "learning_rate": 1.1683416650844544e-05, "loss": 0.5415729880332947, "step": 11275 }, { "epoch": 13.83558282208589, "grad_norm": 0.3365718722343445, "learning_rate": 1.1679169067456935e-05, "loss": 0.4602961540222168, "step": 11276 }, { "epoch": 13.83680981595092, "grad_norm": 0.2386351078748703, "learning_rate": 1.1674922020994022e-05, "loss": 0.4658939242362976, "step": 11277 }, { "epoch": 13.83803680981595, "grad_norm": 0.3175267279148102, "learning_rate": 1.1670675511626999e-05, "loss": 0.606932520866394, "step": 11278 }, { "epoch": 13.839263803680982, "grad_norm": 0.26643314957618713, "learning_rate": 1.1666429539527026e-05, "loss": 0.58885657787323, "step": 11279 }, { "epoch": 13.840490797546012, "grad_norm": 0.2500292658805847, "learning_rate": 1.1662184104865253e-05, "loss": 0.6008058786392212, "step": 11280 }, { "epoch": 13.841717791411043, "grad_norm": 0.2698686718940735, "learning_rate": 1.1657939207812807e-05, "loss": 0.5480670928955078, "step": 11281 }, { "epoch": 13.842944785276075, "grad_norm": 0.22616279125213623, "learning_rate": 1.165369484854077e-05, "loss": 0.4581090211868286, "step": 11282 }, { "epoch": 13.844171779141105, "grad_norm": 0.27951809763908386, "learning_rate": 1.1649451027220226e-05, "loss": 0.4848412871360779, "step": 11283 }, { "epoch": 13.845398773006135, "grad_norm": 0.3295670449733734, "learning_rate": 1.1645207744022236e-05, "loss": 0.6735924482345581, "step": 11284 }, { "epoch": 13.846625766871165, "grad_norm": 0.303130179643631, "learning_rate": 1.1640964999117832e-05, "loss": 0.5541064739227295, "step": 11285 }, { "epoch": 13.847852760736195, "grad_norm": 0.35078057646751404, "learning_rate": 1.1636722792678037e-05, "loss": 0.630448579788208, "step": 11286 }, { "epoch": 13.849079754601227, "grad_norm": 0.26983121037483215, "learning_rate": 1.1632481124873826e-05, "loss": 0.7808576822280884, "step": 11287 }, { "epoch": 13.850306748466258, "grad_norm": 0.24309203028678894, "learning_rate": 1.1628239995876172e-05, "loss": 0.5520156621932983, "step": 11288 }, { "epoch": 13.851533742331288, "grad_norm": 0.2518177032470703, "learning_rate": 1.1623999405856018e-05, "loss": 0.7427421808242798, "step": 11289 }, { "epoch": 13.85276073619632, "grad_norm": 0.26545828580856323, "learning_rate": 1.161975935498432e-05, "loss": 0.6879348754882812, "step": 11290 }, { "epoch": 13.85398773006135, "grad_norm": 0.2810609042644501, "learning_rate": 1.1615519843431949e-05, "loss": 0.6566874980926514, "step": 11291 }, { "epoch": 13.85521472392638, "grad_norm": 0.2541888952255249, "learning_rate": 1.1611280871369798e-05, "loss": 0.5680255889892578, "step": 11292 }, { "epoch": 13.85644171779141, "grad_norm": 0.2529528737068176, "learning_rate": 1.1607042438968732e-05, "loss": 0.6929748058319092, "step": 11293 }, { "epoch": 13.857668711656443, "grad_norm": 0.2963208556175232, "learning_rate": 1.1602804546399585e-05, "loss": 0.44574466347694397, "step": 11294 }, { "epoch": 13.858895705521473, "grad_norm": 0.24044238030910492, "learning_rate": 1.1598567193833187e-05, "loss": 0.6486616730690002, "step": 11295 }, { "epoch": 13.860122699386503, "grad_norm": 0.2500635087490082, "learning_rate": 1.1594330381440312e-05, "loss": 0.6053789258003235, "step": 11296 }, { "epoch": 13.861349693251533, "grad_norm": 0.30534276366233826, "learning_rate": 1.159009410939175e-05, "loss": 0.6994079351425171, "step": 11297 }, { "epoch": 13.862576687116565, "grad_norm": 0.25846540927886963, "learning_rate": 1.1585858377858244e-05, "loss": 0.6745092868804932, "step": 11298 }, { "epoch": 13.863803680981595, "grad_norm": 0.38634634017944336, "learning_rate": 1.1581623187010532e-05, "loss": 0.6826267242431641, "step": 11299 }, { "epoch": 13.865030674846626, "grad_norm": 0.30680936574935913, "learning_rate": 1.1577388537019326e-05, "loss": 0.5253598690032959, "step": 11300 }, { "epoch": 13.866257668711656, "grad_norm": 0.26572132110595703, "learning_rate": 1.1573154428055297e-05, "loss": 0.5193166732788086, "step": 11301 }, { "epoch": 13.867484662576688, "grad_norm": 0.230309396982193, "learning_rate": 1.1568920860289118e-05, "loss": 0.5680872201919556, "step": 11302 }, { "epoch": 13.868711656441718, "grad_norm": 0.2543047368526459, "learning_rate": 1.1564687833891433e-05, "loss": 0.672579288482666, "step": 11303 }, { "epoch": 13.869938650306748, "grad_norm": 0.24032314121723175, "learning_rate": 1.1560455349032861e-05, "loss": 0.4930616021156311, "step": 11304 }, { "epoch": 13.871165644171779, "grad_norm": 0.26166969537734985, "learning_rate": 1.1556223405884015e-05, "loss": 0.7033579349517822, "step": 11305 }, { "epoch": 13.87239263803681, "grad_norm": 0.23765134811401367, "learning_rate": 1.1551992004615447e-05, "loss": 0.5311694145202637, "step": 11306 }, { "epoch": 13.87361963190184, "grad_norm": 0.37240418791770935, "learning_rate": 1.1547761145397732e-05, "loss": 0.46193408966064453, "step": 11307 }, { "epoch": 13.874846625766871, "grad_norm": 0.26171255111694336, "learning_rate": 1.1543530828401395e-05, "loss": 0.5987931489944458, "step": 11308 }, { "epoch": 13.876073619631901, "grad_norm": 0.28966036438941956, "learning_rate": 1.153930105379695e-05, "loss": 0.6016228199005127, "step": 11309 }, { "epoch": 13.877300613496933, "grad_norm": 0.24218137562274933, "learning_rate": 1.1535071821754892e-05, "loss": 0.7120758295059204, "step": 11310 }, { "epoch": 13.878527607361963, "grad_norm": 0.294444739818573, "learning_rate": 1.1530843132445682e-05, "loss": 0.752862811088562, "step": 11311 }, { "epoch": 13.879754601226994, "grad_norm": 0.2687140107154846, "learning_rate": 1.1526614986039772e-05, "loss": 0.6228930950164795, "step": 11312 }, { "epoch": 13.880981595092024, "grad_norm": 0.27765628695487976, "learning_rate": 1.1522387382707586e-05, "loss": 0.7948009371757507, "step": 11313 }, { "epoch": 13.882208588957056, "grad_norm": 0.23637522757053375, "learning_rate": 1.1518160322619523e-05, "loss": 0.6600539088249207, "step": 11314 }, { "epoch": 13.883435582822086, "grad_norm": 0.27136483788490295, "learning_rate": 1.1513933805945976e-05, "loss": 0.4391419589519501, "step": 11315 }, { "epoch": 13.884662576687116, "grad_norm": 0.2663549482822418, "learning_rate": 1.1509707832857287e-05, "loss": 0.7655937671661377, "step": 11316 }, { "epoch": 13.885889570552147, "grad_norm": 0.26163676381111145, "learning_rate": 1.1505482403523795e-05, "loss": 0.4536455571651459, "step": 11317 }, { "epoch": 13.887116564417179, "grad_norm": 0.27659979462623596, "learning_rate": 1.1501257518115824e-05, "loss": 0.5430070757865906, "step": 11318 }, { "epoch": 13.888343558282209, "grad_norm": 0.271964967250824, "learning_rate": 1.149703317680366e-05, "loss": 0.7150459289550781, "step": 11319 }, { "epoch": 13.889570552147239, "grad_norm": 0.2461930364370346, "learning_rate": 1.1492809379757589e-05, "loss": 0.6844857335090637, "step": 11320 }, { "epoch": 13.89079754601227, "grad_norm": 0.2327762246131897, "learning_rate": 1.1488586127147835e-05, "loss": 0.6214553713798523, "step": 11321 }, { "epoch": 13.892024539877301, "grad_norm": 0.31361252069473267, "learning_rate": 1.1484363419144639e-05, "loss": 0.680143415927887, "step": 11322 }, { "epoch": 13.893251533742331, "grad_norm": 0.24287579953670502, "learning_rate": 1.1480141255918209e-05, "loss": 0.5601629018783569, "step": 11323 }, { "epoch": 13.894478527607362, "grad_norm": 0.318131685256958, "learning_rate": 1.1475919637638723e-05, "loss": 0.5406585931777954, "step": 11324 }, { "epoch": 13.895705521472392, "grad_norm": 0.2778862416744232, "learning_rate": 1.1471698564476354e-05, "loss": 0.5209263563156128, "step": 11325 }, { "epoch": 13.896932515337424, "grad_norm": 0.25638675689697266, "learning_rate": 1.1467478036601223e-05, "loss": 0.5099114775657654, "step": 11326 }, { "epoch": 13.898159509202454, "grad_norm": 0.28199419379234314, "learning_rate": 1.1463258054183457e-05, "loss": 0.697396993637085, "step": 11327 }, { "epoch": 13.899386503067484, "grad_norm": 0.29321736097335815, "learning_rate": 1.1459038617393151e-05, "loss": 0.48631584644317627, "step": 11328 }, { "epoch": 13.900613496932515, "grad_norm": 0.23566098511219025, "learning_rate": 1.1454819726400376e-05, "loss": 0.6766301989555359, "step": 11329 }, { "epoch": 13.901840490797547, "grad_norm": 0.2574799656867981, "learning_rate": 1.1450601381375198e-05, "loss": 0.7723990082740784, "step": 11330 }, { "epoch": 13.903067484662577, "grad_norm": 0.25105640292167664, "learning_rate": 1.1446383582487622e-05, "loss": 0.37631407380104065, "step": 11331 }, { "epoch": 13.904294478527607, "grad_norm": 0.2907632887363434, "learning_rate": 1.144216632990767e-05, "loss": 0.46474409103393555, "step": 11332 }, { "epoch": 13.905521472392637, "grad_norm": 0.27017778158187866, "learning_rate": 1.1437949623805325e-05, "loss": 0.48842760920524597, "step": 11333 }, { "epoch": 13.90674846625767, "grad_norm": 0.2703027129173279, "learning_rate": 1.1433733464350549e-05, "loss": 0.7497262358665466, "step": 11334 }, { "epoch": 13.9079754601227, "grad_norm": 0.314117968082428, "learning_rate": 1.1429517851713283e-05, "loss": 0.5451369881629944, "step": 11335 }, { "epoch": 13.90920245398773, "grad_norm": 0.24199797213077545, "learning_rate": 1.1425302786063452e-05, "loss": 0.6798047423362732, "step": 11336 }, { "epoch": 13.91042944785276, "grad_norm": 0.2541612386703491, "learning_rate": 1.1421088267570947e-05, "loss": 0.5991063117980957, "step": 11337 }, { "epoch": 13.911656441717792, "grad_norm": 0.27546247839927673, "learning_rate": 1.1416874296405649e-05, "loss": 0.47934556007385254, "step": 11338 }, { "epoch": 13.912883435582822, "grad_norm": 0.2871321737766266, "learning_rate": 1.1412660872737416e-05, "loss": 0.6763548851013184, "step": 11339 }, { "epoch": 13.914110429447852, "grad_norm": 0.2927126884460449, "learning_rate": 1.140844799673606e-05, "loss": 0.5786142349243164, "step": 11340 }, { "epoch": 13.915337423312884, "grad_norm": 0.26364555954933167, "learning_rate": 1.14042356685714e-05, "loss": 0.5088212490081787, "step": 11341 }, { "epoch": 13.916564417177915, "grad_norm": 0.26277437806129456, "learning_rate": 1.1400023888413225e-05, "loss": 0.3858678340911865, "step": 11342 }, { "epoch": 13.917791411042945, "grad_norm": 0.27017807960510254, "learning_rate": 1.1395812656431301e-05, "loss": 0.8784078359603882, "step": 11343 }, { "epoch": 13.919018404907975, "grad_norm": 0.28078508377075195, "learning_rate": 1.1391601972795377e-05, "loss": 0.40468937158584595, "step": 11344 }, { "epoch": 13.920245398773005, "grad_norm": 0.25039753317832947, "learning_rate": 1.1387391837675152e-05, "loss": 0.5166531205177307, "step": 11345 }, { "epoch": 13.921472392638037, "grad_norm": 0.2660287916660309, "learning_rate": 1.138318225124034e-05, "loss": 0.8369498252868652, "step": 11346 }, { "epoch": 13.922699386503067, "grad_norm": 0.29409751296043396, "learning_rate": 1.1378973213660615e-05, "loss": 0.5976976156234741, "step": 11347 }, { "epoch": 13.923926380368098, "grad_norm": 0.24553845822811127, "learning_rate": 1.1374764725105632e-05, "loss": 0.5461355447769165, "step": 11348 }, { "epoch": 13.92515337423313, "grad_norm": 0.27329960465431213, "learning_rate": 1.1370556785745032e-05, "loss": 0.5994942784309387, "step": 11349 }, { "epoch": 13.92638036809816, "grad_norm": 0.2678985893726349, "learning_rate": 1.1366349395748407e-05, "loss": 0.7128844261169434, "step": 11350 }, { "epoch": 13.92760736196319, "grad_norm": 0.24808533489704132, "learning_rate": 1.1362142555285351e-05, "loss": 0.6100072860717773, "step": 11351 }, { "epoch": 13.92883435582822, "grad_norm": 0.30575037002563477, "learning_rate": 1.1357936264525435e-05, "loss": 0.4967203736305237, "step": 11352 }, { "epoch": 13.93006134969325, "grad_norm": 0.2630953788757324, "learning_rate": 1.1353730523638199e-05, "loss": 0.6560882329940796, "step": 11353 }, { "epoch": 13.931288343558283, "grad_norm": 0.25511571764945984, "learning_rate": 1.1349525332793174e-05, "loss": 0.6349400877952576, "step": 11354 }, { "epoch": 13.932515337423313, "grad_norm": 0.24690045416355133, "learning_rate": 1.1345320692159838e-05, "loss": 0.4488574266433716, "step": 11355 }, { "epoch": 13.933742331288343, "grad_norm": 0.274191677570343, "learning_rate": 1.1341116601907675e-05, "loss": 0.5012326240539551, "step": 11356 }, { "epoch": 13.934969325153375, "grad_norm": 0.28957903385162354, "learning_rate": 1.1336913062206156e-05, "loss": 0.3908459544181824, "step": 11357 }, { "epoch": 13.936196319018405, "grad_norm": 0.2617536187171936, "learning_rate": 1.1332710073224709e-05, "loss": 0.7396116256713867, "step": 11358 }, { "epoch": 13.937423312883435, "grad_norm": 0.3461896479129791, "learning_rate": 1.1328507635132732e-05, "loss": 0.5495182275772095, "step": 11359 }, { "epoch": 13.938650306748466, "grad_norm": 0.29344242811203003, "learning_rate": 1.1324305748099617e-05, "loss": 0.6314305067062378, "step": 11360 }, { "epoch": 13.939877300613498, "grad_norm": 0.34297749400138855, "learning_rate": 1.1320104412294735e-05, "loss": 0.6537272930145264, "step": 11361 }, { "epoch": 13.941104294478528, "grad_norm": 0.2517843246459961, "learning_rate": 1.1315903627887429e-05, "loss": 0.6746444702148438, "step": 11362 }, { "epoch": 13.942331288343558, "grad_norm": 0.3233439028263092, "learning_rate": 1.1311703395047027e-05, "loss": 0.4968644976615906, "step": 11363 }, { "epoch": 13.943558282208588, "grad_norm": 0.3354615569114685, "learning_rate": 1.130750371394281e-05, "loss": 0.6938323974609375, "step": 11364 }, { "epoch": 13.94478527607362, "grad_norm": 0.3072521984577179, "learning_rate": 1.130330458474407e-05, "loss": 0.5741609334945679, "step": 11365 }, { "epoch": 13.94601226993865, "grad_norm": 0.28587549924850464, "learning_rate": 1.1299106007620055e-05, "loss": 0.566665530204773, "step": 11366 }, { "epoch": 13.94723926380368, "grad_norm": 0.27502545714378357, "learning_rate": 1.1294907982740002e-05, "loss": 0.5396663546562195, "step": 11367 }, { "epoch": 13.948466257668711, "grad_norm": 0.23473751544952393, "learning_rate": 1.1290710510273128e-05, "loss": 0.6312269568443298, "step": 11368 }, { "epoch": 13.949693251533743, "grad_norm": 0.2830473780632019, "learning_rate": 1.1286513590388607e-05, "loss": 0.6556686162948608, "step": 11369 }, { "epoch": 13.950920245398773, "grad_norm": 0.26940083503723145, "learning_rate": 1.1282317223255606e-05, "loss": 0.48672837018966675, "step": 11370 }, { "epoch": 13.952147239263804, "grad_norm": 0.2986363470554352, "learning_rate": 1.1278121409043276e-05, "loss": 0.5601890683174133, "step": 11371 }, { "epoch": 13.953374233128834, "grad_norm": 0.20412231981754303, "learning_rate": 1.1273926147920738e-05, "loss": 0.5476642847061157, "step": 11372 }, { "epoch": 13.954601226993866, "grad_norm": 0.23055236041545868, "learning_rate": 1.1269731440057097e-05, "loss": 0.6301791667938232, "step": 11373 }, { "epoch": 13.955828220858896, "grad_norm": 0.32836848497390747, "learning_rate": 1.1265537285621414e-05, "loss": 0.8041034936904907, "step": 11374 }, { "epoch": 13.957055214723926, "grad_norm": 0.2679445445537567, "learning_rate": 1.1261343684782749e-05, "loss": 0.6431858539581299, "step": 11375 }, { "epoch": 13.958282208588956, "grad_norm": 0.261491984128952, "learning_rate": 1.1257150637710136e-05, "loss": 0.5809481143951416, "step": 11376 }, { "epoch": 13.959509202453988, "grad_norm": 0.2730907201766968, "learning_rate": 1.1252958144572587e-05, "loss": 0.7168201208114624, "step": 11377 }, { "epoch": 13.960736196319019, "grad_norm": 0.3530980348587036, "learning_rate": 1.1248766205539086e-05, "loss": 0.6088079214096069, "step": 11378 }, { "epoch": 13.961963190184049, "grad_norm": 0.2683124244213104, "learning_rate": 1.1244574820778597e-05, "loss": 0.5347650051116943, "step": 11379 }, { "epoch": 13.963190184049079, "grad_norm": 0.268298476934433, "learning_rate": 1.1240383990460069e-05, "loss": 0.5809836387634277, "step": 11380 }, { "epoch": 13.964417177914111, "grad_norm": 0.2990734279155731, "learning_rate": 1.1236193714752417e-05, "loss": 0.5030883550643921, "step": 11381 }, { "epoch": 13.965644171779141, "grad_norm": 0.2739100456237793, "learning_rate": 1.123200399382455e-05, "loss": 0.539035439491272, "step": 11382 }, { "epoch": 13.966871165644172, "grad_norm": 0.5871991515159607, "learning_rate": 1.1227814827845326e-05, "loss": 0.8004428148269653, "step": 11383 }, { "epoch": 13.968098159509202, "grad_norm": 0.29784509539604187, "learning_rate": 1.1223626216983607e-05, "loss": 0.7137231826782227, "step": 11384 }, { "epoch": 13.969325153374234, "grad_norm": 0.23973704874515533, "learning_rate": 1.1219438161408222e-05, "loss": 0.6346318125724792, "step": 11385 }, { "epoch": 13.970552147239264, "grad_norm": 0.29235437512397766, "learning_rate": 1.1215250661287984e-05, "loss": 0.6612898111343384, "step": 11386 }, { "epoch": 13.971779141104294, "grad_norm": 0.2688601016998291, "learning_rate": 1.1211063716791683e-05, "loss": 0.5551872849464417, "step": 11387 }, { "epoch": 13.973006134969324, "grad_norm": 0.23407459259033203, "learning_rate": 1.120687732808807e-05, "loss": 0.527056097984314, "step": 11388 }, { "epoch": 13.974233128834356, "grad_norm": 0.2584964334964752, "learning_rate": 1.120269149534589e-05, "loss": 0.668910026550293, "step": 11389 }, { "epoch": 13.975460122699387, "grad_norm": 0.25031477212905884, "learning_rate": 1.1198506218733864e-05, "loss": 0.5531009435653687, "step": 11390 }, { "epoch": 13.976687116564417, "grad_norm": 0.31552854180336, "learning_rate": 1.1194321498420693e-05, "loss": 0.7542461156845093, "step": 11391 }, { "epoch": 13.977914110429447, "grad_norm": 0.29968956112861633, "learning_rate": 1.1190137334575054e-05, "loss": 0.8321963548660278, "step": 11392 }, { "epoch": 13.979141104294479, "grad_norm": 0.2557520866394043, "learning_rate": 1.1185953727365583e-05, "loss": 0.7274836897850037, "step": 11393 }, { "epoch": 13.98036809815951, "grad_norm": 0.2736785113811493, "learning_rate": 1.118177067696092e-05, "loss": 0.5285515785217285, "step": 11394 }, { "epoch": 13.98159509202454, "grad_norm": 0.2677626311779022, "learning_rate": 1.117758818352967e-05, "loss": 0.7150754928588867, "step": 11395 }, { "epoch": 13.98282208588957, "grad_norm": 0.27278417348861694, "learning_rate": 1.1173406247240415e-05, "loss": 0.6408794522285461, "step": 11396 }, { "epoch": 13.984049079754602, "grad_norm": 0.26714780926704407, "learning_rate": 1.116922486826173e-05, "loss": 0.4827502369880676, "step": 11397 }, { "epoch": 13.985276073619632, "grad_norm": 0.26287102699279785, "learning_rate": 1.1165044046762135e-05, "loss": 0.6150903701782227, "step": 11398 }, { "epoch": 13.986503067484662, "grad_norm": 0.3036004602909088, "learning_rate": 1.1160863782910155e-05, "loss": 0.5593575239181519, "step": 11399 }, { "epoch": 13.987730061349692, "grad_norm": 0.2505505681037903, "learning_rate": 1.1156684076874277e-05, "loss": 0.6768448352813721, "step": 11400 }, { "epoch": 13.988957055214724, "grad_norm": 0.2709074020385742, "learning_rate": 1.1152504928823002e-05, "loss": 0.67838054895401, "step": 11401 }, { "epoch": 13.990184049079755, "grad_norm": 0.29103562235832214, "learning_rate": 1.1148326338924747e-05, "loss": 0.626711368560791, "step": 11402 }, { "epoch": 13.991411042944785, "grad_norm": 0.24182921648025513, "learning_rate": 1.1144148307347952e-05, "loss": 0.6507073640823364, "step": 11403 }, { "epoch": 13.992638036809815, "grad_norm": 0.2970719039440155, "learning_rate": 1.1139970834261022e-05, "loss": 0.34116798639297485, "step": 11404 }, { "epoch": 13.993865030674847, "grad_norm": 0.26064351201057434, "learning_rate": 1.1135793919832336e-05, "loss": 0.7453627586364746, "step": 11405 }, { "epoch": 13.995092024539877, "grad_norm": 0.26690685749053955, "learning_rate": 1.1131617564230263e-05, "loss": 0.5442085266113281, "step": 11406 }, { "epoch": 13.996319018404908, "grad_norm": 0.2531208097934723, "learning_rate": 1.1127441767623122e-05, "loss": 0.5016471147537231, "step": 11407 }, { "epoch": 13.99754601226994, "grad_norm": 0.28734344244003296, "learning_rate": 1.1123266530179241e-05, "loss": 0.6317758560180664, "step": 11408 }, { "epoch": 13.99877300613497, "grad_norm": 0.23886770009994507, "learning_rate": 1.111909185206691e-05, "loss": 0.59904944896698, "step": 11409 }, { "epoch": 14.0, "grad_norm": 0.2701731026172638, "learning_rate": 1.1114917733454393e-05, "loss": 0.7054635882377625, "step": 11410 }, { "epoch": 14.00122699386503, "grad_norm": 0.22998470067977905, "learning_rate": 1.1110744174509952e-05, "loss": 0.48845475912094116, "step": 11411 }, { "epoch": 14.002453987730062, "grad_norm": 0.2544991672039032, "learning_rate": 1.1106571175401789e-05, "loss": 0.6061588525772095, "step": 11412 }, { "epoch": 14.003680981595092, "grad_norm": 0.2110055387020111, "learning_rate": 1.1102398736298116e-05, "loss": 0.4677850008010864, "step": 11413 }, { "epoch": 14.004907975460123, "grad_norm": 0.24186095595359802, "learning_rate": 1.1098226857367116e-05, "loss": 0.5941004157066345, "step": 11414 }, { "epoch": 14.006134969325153, "grad_norm": 0.27111348509788513, "learning_rate": 1.109405553877694e-05, "loss": 0.7706423997879028, "step": 11415 }, { "epoch": 14.007361963190185, "grad_norm": 0.2483184039592743, "learning_rate": 1.1089884780695726e-05, "loss": 0.4424874484539032, "step": 11416 }, { "epoch": 14.008588957055215, "grad_norm": 0.2531687617301941, "learning_rate": 1.108571458329159e-05, "loss": 0.4119299352169037, "step": 11417 }, { "epoch": 14.009815950920245, "grad_norm": 0.24997657537460327, "learning_rate": 1.1081544946732609e-05, "loss": 0.706852912902832, "step": 11418 }, { "epoch": 14.011042944785276, "grad_norm": 0.23272758722305298, "learning_rate": 1.107737587118685e-05, "loss": 0.6292295455932617, "step": 11419 }, { "epoch": 14.012269938650308, "grad_norm": 0.2952258288860321, "learning_rate": 1.1073207356822365e-05, "loss": 0.585690975189209, "step": 11420 }, { "epoch": 14.013496932515338, "grad_norm": 0.2458692044019699, "learning_rate": 1.106903940380717e-05, "loss": 0.6705917119979858, "step": 11421 }, { "epoch": 14.014723926380368, "grad_norm": 0.23945708572864532, "learning_rate": 1.106487201230927e-05, "loss": 0.4725190997123718, "step": 11422 }, { "epoch": 14.015950920245398, "grad_norm": 0.28832387924194336, "learning_rate": 1.1060705182496622e-05, "loss": 0.8441885113716125, "step": 11423 }, { "epoch": 14.01717791411043, "grad_norm": 0.2428138554096222, "learning_rate": 1.1056538914537198e-05, "loss": 0.6667507886886597, "step": 11424 }, { "epoch": 14.01840490797546, "grad_norm": 0.25914111733436584, "learning_rate": 1.1052373208598923e-05, "loss": 0.49357742071151733, "step": 11425 }, { "epoch": 14.01963190184049, "grad_norm": 0.257437139749527, "learning_rate": 1.1048208064849714e-05, "loss": 0.7368593215942383, "step": 11426 }, { "epoch": 14.020858895705521, "grad_norm": 0.2448194921016693, "learning_rate": 1.1044043483457437e-05, "loss": 0.7178465127944946, "step": 11427 }, { "epoch": 14.022085889570553, "grad_norm": 0.4799049198627472, "learning_rate": 1.1039879464589962e-05, "loss": 0.4439043402671814, "step": 11428 }, { "epoch": 14.023312883435583, "grad_norm": 0.2905004322528839, "learning_rate": 1.1035716008415129e-05, "loss": 0.47985926270484924, "step": 11429 }, { "epoch": 14.024539877300613, "grad_norm": 0.23943911492824554, "learning_rate": 1.103155311510076e-05, "loss": 0.6652151346206665, "step": 11430 }, { "epoch": 14.025766871165644, "grad_norm": 0.27426064014434814, "learning_rate": 1.1027390784814654e-05, "loss": 0.5346325039863586, "step": 11431 }, { "epoch": 14.026993865030676, "grad_norm": 0.22370705008506775, "learning_rate": 1.1023229017724562e-05, "loss": 0.6111642718315125, "step": 11432 }, { "epoch": 14.028220858895706, "grad_norm": 0.22573643922805786, "learning_rate": 1.101906781399825e-05, "loss": 0.7162476181983948, "step": 11433 }, { "epoch": 14.029447852760736, "grad_norm": 0.26688528060913086, "learning_rate": 1.1014907173803435e-05, "loss": 0.6526927351951599, "step": 11434 }, { "epoch": 14.030674846625766, "grad_norm": 0.25115981698036194, "learning_rate": 1.1010747097307827e-05, "loss": 0.8198490142822266, "step": 11435 }, { "epoch": 14.031901840490798, "grad_norm": 0.2438686043024063, "learning_rate": 1.1006587584679115e-05, "loss": 0.6342054605484009, "step": 11436 }, { "epoch": 14.033128834355828, "grad_norm": 0.23348626494407654, "learning_rate": 1.1002428636084938e-05, "loss": 0.6164011359214783, "step": 11437 }, { "epoch": 14.034355828220859, "grad_norm": 0.23685914278030396, "learning_rate": 1.0998270251692937e-05, "loss": 0.4906509518623352, "step": 11438 }, { "epoch": 14.035582822085889, "grad_norm": 0.26000306010246277, "learning_rate": 1.0994112431670733e-05, "loss": 0.5387810468673706, "step": 11439 }, { "epoch": 14.036809815950921, "grad_norm": 0.26343220472335815, "learning_rate": 1.0989955176185906e-05, "loss": 0.6548834443092346, "step": 11440 }, { "epoch": 14.038036809815951, "grad_norm": 0.2795306146144867, "learning_rate": 1.098579848540604e-05, "loss": 0.5748295187950134, "step": 11441 }, { "epoch": 14.039263803680981, "grad_norm": 0.2987796366214752, "learning_rate": 1.0981642359498659e-05, "loss": 0.5754965543746948, "step": 11442 }, { "epoch": 14.040490797546012, "grad_norm": 0.29563629627227783, "learning_rate": 1.097748679863129e-05, "loss": 0.5677947402000427, "step": 11443 }, { "epoch": 14.041717791411044, "grad_norm": 0.23229153454303741, "learning_rate": 1.0973331802971437e-05, "loss": 0.7345472574234009, "step": 11444 }, { "epoch": 14.042944785276074, "grad_norm": 0.235984668135643, "learning_rate": 1.0969177372686574e-05, "loss": 0.6180320978164673, "step": 11445 }, { "epoch": 14.044171779141104, "grad_norm": 0.21711020171642303, "learning_rate": 1.0965023507944153e-05, "loss": 0.4419189989566803, "step": 11446 }, { "epoch": 14.045398773006134, "grad_norm": 0.2601080536842346, "learning_rate": 1.0960870208911603e-05, "loss": 0.4676133990287781, "step": 11447 }, { "epoch": 14.046625766871166, "grad_norm": 0.25065961480140686, "learning_rate": 1.0956717475756337e-05, "loss": 0.5646803975105286, "step": 11448 }, { "epoch": 14.047852760736196, "grad_norm": 0.2229679524898529, "learning_rate": 1.0952565308645738e-05, "loss": 0.665188729763031, "step": 11449 }, { "epoch": 14.049079754601227, "grad_norm": 0.30792728066444397, "learning_rate": 1.094841370774717e-05, "loss": 0.6867384910583496, "step": 11450 }, { "epoch": 14.050306748466257, "grad_norm": 0.25779542326927185, "learning_rate": 1.0944262673227967e-05, "loss": 0.6080865859985352, "step": 11451 }, { "epoch": 14.051533742331289, "grad_norm": 0.2601656913757324, "learning_rate": 1.0940112205255443e-05, "loss": 0.40170782804489136, "step": 11452 }, { "epoch": 14.05276073619632, "grad_norm": 0.24286434054374695, "learning_rate": 1.0935962303996899e-05, "loss": 0.349554181098938, "step": 11453 }, { "epoch": 14.05398773006135, "grad_norm": 0.2994990050792694, "learning_rate": 1.0931812969619601e-05, "loss": 0.613149881362915, "step": 11454 }, { "epoch": 14.05521472392638, "grad_norm": 0.2913520336151123, "learning_rate": 1.0927664202290808e-05, "loss": 0.4978853464126587, "step": 11455 }, { "epoch": 14.056441717791412, "grad_norm": 0.28578171133995056, "learning_rate": 1.0923516002177725e-05, "loss": 0.541861355304718, "step": 11456 }, { "epoch": 14.057668711656442, "grad_norm": 0.23766180872917175, "learning_rate": 1.0919368369447567e-05, "loss": 0.7089803218841553, "step": 11457 }, { "epoch": 14.058895705521472, "grad_norm": 0.28896841406822205, "learning_rate": 1.0915221304267509e-05, "loss": 0.4158972203731537, "step": 11458 }, { "epoch": 14.060122699386502, "grad_norm": 0.24375468492507935, "learning_rate": 1.0911074806804708e-05, "loss": 0.5134633183479309, "step": 11459 }, { "epoch": 14.061349693251534, "grad_norm": 0.23405712842941284, "learning_rate": 1.0906928877226311e-05, "loss": 0.5638279914855957, "step": 11460 }, { "epoch": 14.062576687116565, "grad_norm": 0.2977820932865143, "learning_rate": 1.0902783515699405e-05, "loss": 0.6638854146003723, "step": 11461 }, { "epoch": 14.063803680981595, "grad_norm": 0.2562178373336792, "learning_rate": 1.0898638722391088e-05, "loss": 0.7037655711174011, "step": 11462 }, { "epoch": 14.065030674846625, "grad_norm": 0.28017479181289673, "learning_rate": 1.0894494497468427e-05, "loss": 0.582331120967865, "step": 11463 }, { "epoch": 14.066257668711657, "grad_norm": 0.24769915640354156, "learning_rate": 1.0890350841098461e-05, "loss": 0.6148347854614258, "step": 11464 }, { "epoch": 14.067484662576687, "grad_norm": 0.2613491117954254, "learning_rate": 1.0886207753448224e-05, "loss": 0.7053505182266235, "step": 11465 }, { "epoch": 14.068711656441717, "grad_norm": 0.23862333595752716, "learning_rate": 1.0882065234684687e-05, "loss": 0.4688844084739685, "step": 11466 }, { "epoch": 14.069938650306748, "grad_norm": 0.2639999985694885, "learning_rate": 1.0877923284974825e-05, "loss": 0.8281700611114502, "step": 11467 }, { "epoch": 14.07116564417178, "grad_norm": 0.26866859197616577, "learning_rate": 1.0873781904485609e-05, "loss": 0.6617327332496643, "step": 11468 }, { "epoch": 14.07239263803681, "grad_norm": 0.2718425691127777, "learning_rate": 1.0869641093383962e-05, "loss": 0.530311107635498, "step": 11469 }, { "epoch": 14.07361963190184, "grad_norm": 0.25665178894996643, "learning_rate": 1.0865500851836774e-05, "loss": 0.689092755317688, "step": 11470 }, { "epoch": 14.07484662576687, "grad_norm": 0.25048935413360596, "learning_rate": 1.0861361180010934e-05, "loss": 0.629930317401886, "step": 11471 }, { "epoch": 14.076073619631902, "grad_norm": 0.2769034802913666, "learning_rate": 1.0857222078073299e-05, "loss": 0.7037187814712524, "step": 11472 }, { "epoch": 14.077300613496933, "grad_norm": 0.25555703043937683, "learning_rate": 1.0853083546190707e-05, "loss": 0.5199291706085205, "step": 11473 }, { "epoch": 14.078527607361963, "grad_norm": 0.2512415051460266, "learning_rate": 1.0848945584529976e-05, "loss": 0.6109989881515503, "step": 11474 }, { "epoch": 14.079754601226995, "grad_norm": 0.22660620510578156, "learning_rate": 1.084480819325788e-05, "loss": 0.5066816806793213, "step": 11475 }, { "epoch": 14.080981595092025, "grad_norm": 0.2790195643901825, "learning_rate": 1.0840671372541194e-05, "loss": 0.5798682570457458, "step": 11476 }, { "epoch": 14.082208588957055, "grad_norm": 0.31653687357902527, "learning_rate": 1.0836535122546663e-05, "loss": 0.45651936531066895, "step": 11477 }, { "epoch": 14.083435582822085, "grad_norm": 0.3325871229171753, "learning_rate": 1.0832399443441005e-05, "loss": 0.5814213752746582, "step": 11478 }, { "epoch": 14.084662576687117, "grad_norm": 0.2545170783996582, "learning_rate": 1.0828264335390924e-05, "loss": 0.5627686381340027, "step": 11479 }, { "epoch": 14.085889570552148, "grad_norm": 0.22901570796966553, "learning_rate": 1.0824129798563084e-05, "loss": 0.6652421951293945, "step": 11480 }, { "epoch": 14.087116564417178, "grad_norm": 0.2504453957080841, "learning_rate": 1.0819995833124139e-05, "loss": 0.7157544493675232, "step": 11481 }, { "epoch": 14.088343558282208, "grad_norm": 0.236867755651474, "learning_rate": 1.081586243924072e-05, "loss": 0.5972728729248047, "step": 11482 }, { "epoch": 14.08957055214724, "grad_norm": 0.2591000199317932, "learning_rate": 1.0811729617079431e-05, "loss": 0.7268708944320679, "step": 11483 }, { "epoch": 14.09079754601227, "grad_norm": 0.24402554333209991, "learning_rate": 1.0807597366806862e-05, "loss": 0.6629390716552734, "step": 11484 }, { "epoch": 14.0920245398773, "grad_norm": 0.2407207489013672, "learning_rate": 1.080346568858956e-05, "loss": 0.5825148820877075, "step": 11485 }, { "epoch": 14.09325153374233, "grad_norm": 0.3227500319480896, "learning_rate": 1.0799334582594065e-05, "loss": 0.6913478374481201, "step": 11486 }, { "epoch": 14.094478527607363, "grad_norm": 0.3104076087474823, "learning_rate": 1.079520404898689e-05, "loss": 0.6239058971405029, "step": 11487 }, { "epoch": 14.095705521472393, "grad_norm": 0.293511301279068, "learning_rate": 1.0791074087934527e-05, "loss": 0.5194659233093262, "step": 11488 }, { "epoch": 14.096932515337423, "grad_norm": 0.2516288757324219, "learning_rate": 1.0786944699603443e-05, "loss": 0.7255079746246338, "step": 11489 }, { "epoch": 14.098159509202453, "grad_norm": 0.25664955377578735, "learning_rate": 1.078281588416008e-05, "loss": 0.6085963249206543, "step": 11490 }, { "epoch": 14.099386503067485, "grad_norm": 0.2845543920993805, "learning_rate": 1.0778687641770862e-05, "loss": 0.4937312602996826, "step": 11491 }, { "epoch": 14.100613496932516, "grad_norm": 0.23317812383174896, "learning_rate": 1.0774559972602183e-05, "loss": 0.7580980062484741, "step": 11492 }, { "epoch": 14.101840490797546, "grad_norm": 0.2677284777164459, "learning_rate": 1.077043287682043e-05, "loss": 0.686177670955658, "step": 11493 }, { "epoch": 14.103067484662576, "grad_norm": 0.30821898579597473, "learning_rate": 1.0766306354591934e-05, "loss": 0.4835357666015625, "step": 11494 }, { "epoch": 14.104294478527608, "grad_norm": 0.2733663022518158, "learning_rate": 1.0762180406083033e-05, "loss": 0.44196057319641113, "step": 11495 }, { "epoch": 14.105521472392638, "grad_norm": 0.27568313479423523, "learning_rate": 1.0758055031460032e-05, "loss": 0.46132779121398926, "step": 11496 }, { "epoch": 14.106748466257669, "grad_norm": 0.2242080122232437, "learning_rate": 1.0753930230889212e-05, "loss": 0.6135105490684509, "step": 11497 }, { "epoch": 14.107975460122699, "grad_norm": 0.22288011014461517, "learning_rate": 1.0749806004536844e-05, "loss": 0.48333626985549927, "step": 11498 }, { "epoch": 14.10920245398773, "grad_norm": 0.2549131214618683, "learning_rate": 1.0745682352569145e-05, "loss": 0.7411036491394043, "step": 11499 }, { "epoch": 14.110429447852761, "grad_norm": 0.23113363981246948, "learning_rate": 1.0741559275152336e-05, "loss": 0.5533386468887329, "step": 11500 }, { "epoch": 14.111656441717791, "grad_norm": 0.29081642627716064, "learning_rate": 1.0737436772452603e-05, "loss": 0.5241289138793945, "step": 11501 }, { "epoch": 14.112883435582821, "grad_norm": 0.24584762752056122, "learning_rate": 1.0733314844636116e-05, "loss": 0.4794396162033081, "step": 11502 }, { "epoch": 14.114110429447853, "grad_norm": 0.32566148042678833, "learning_rate": 1.072919349186903e-05, "loss": 0.48630163073539734, "step": 11503 }, { "epoch": 14.115337423312884, "grad_norm": 0.2901492118835449, "learning_rate": 1.0725072714317442e-05, "loss": 0.6046281456947327, "step": 11504 }, { "epoch": 14.116564417177914, "grad_norm": 0.23853033781051636, "learning_rate": 1.0720952512147458e-05, "loss": 0.6185427308082581, "step": 11505 }, { "epoch": 14.117791411042944, "grad_norm": 0.27636513113975525, "learning_rate": 1.0716832885525158e-05, "loss": 0.44809994101524353, "step": 11506 }, { "epoch": 14.119018404907976, "grad_norm": 0.22410254180431366, "learning_rate": 1.0712713834616583e-05, "loss": 0.3005024790763855, "step": 11507 }, { "epoch": 14.120245398773006, "grad_norm": 0.24742592871189117, "learning_rate": 1.0708595359587776e-05, "loss": 0.7679787874221802, "step": 11508 }, { "epoch": 14.121472392638037, "grad_norm": 0.3000791668891907, "learning_rate": 1.0704477460604717e-05, "loss": 0.49525320529937744, "step": 11509 }, { "epoch": 14.122699386503067, "grad_norm": 0.28646913170814514, "learning_rate": 1.0700360137833403e-05, "loss": 0.6527382135391235, "step": 11510 }, { "epoch": 14.123926380368099, "grad_norm": 0.2622603476047516, "learning_rate": 1.0696243391439787e-05, "loss": 0.6430947780609131, "step": 11511 }, { "epoch": 14.125153374233129, "grad_norm": 0.26162299513816833, "learning_rate": 1.0692127221589803e-05, "loss": 0.5579301118850708, "step": 11512 }, { "epoch": 14.12638036809816, "grad_norm": 0.26442331075668335, "learning_rate": 1.0688011628449365e-05, "loss": 0.6304373741149902, "step": 11513 }, { "epoch": 14.12760736196319, "grad_norm": 0.29395925998687744, "learning_rate": 1.068389661218436e-05, "loss": 0.6177218556404114, "step": 11514 }, { "epoch": 14.128834355828221, "grad_norm": 0.268648236989975, "learning_rate": 1.0679782172960651e-05, "loss": 0.6919901967048645, "step": 11515 }, { "epoch": 14.130061349693252, "grad_norm": 0.2634170353412628, "learning_rate": 1.0675668310944081e-05, "loss": 0.5100622773170471, "step": 11516 }, { "epoch": 14.131288343558282, "grad_norm": 0.2662968337535858, "learning_rate": 1.0671555026300464e-05, "loss": 0.6017209887504578, "step": 11517 }, { "epoch": 14.132515337423312, "grad_norm": 0.23421812057495117, "learning_rate": 1.0667442319195608e-05, "loss": 0.5085071325302124, "step": 11518 }, { "epoch": 14.133742331288344, "grad_norm": 0.24244922399520874, "learning_rate": 1.0663330189795268e-05, "loss": 0.6872467398643494, "step": 11519 }, { "epoch": 14.134969325153374, "grad_norm": 0.24704262614250183, "learning_rate": 1.0659218638265198e-05, "loss": 0.6739990711212158, "step": 11520 }, { "epoch": 14.136196319018405, "grad_norm": 0.29988953471183777, "learning_rate": 1.0655107664771122e-05, "loss": 0.49820154905319214, "step": 11521 }, { "epoch": 14.137423312883435, "grad_norm": 0.2945973575115204, "learning_rate": 1.0650997269478743e-05, "loss": 0.6502155065536499, "step": 11522 }, { "epoch": 14.138650306748467, "grad_norm": 0.273228257894516, "learning_rate": 1.0646887452553746e-05, "loss": 0.5300850868225098, "step": 11523 }, { "epoch": 14.139877300613497, "grad_norm": 0.2511427402496338, "learning_rate": 1.0642778214161775e-05, "loss": 0.5978682637214661, "step": 11524 }, { "epoch": 14.141104294478527, "grad_norm": 0.2584686875343323, "learning_rate": 1.0638669554468464e-05, "loss": 0.6418758630752563, "step": 11525 }, { "epoch": 14.142331288343557, "grad_norm": 0.26446497440338135, "learning_rate": 1.0634561473639423e-05, "loss": 0.6322451829910278, "step": 11526 }, { "epoch": 14.14355828220859, "grad_norm": 0.2819088101387024, "learning_rate": 1.063045397184024e-05, "loss": 0.7443162202835083, "step": 11527 }, { "epoch": 14.14478527607362, "grad_norm": 0.24854052066802979, "learning_rate": 1.0626347049236482e-05, "loss": 0.7464837431907654, "step": 11528 }, { "epoch": 14.14601226993865, "grad_norm": 0.2719731628894806, "learning_rate": 1.062224070599367e-05, "loss": 0.44919735193252563, "step": 11529 }, { "epoch": 14.14723926380368, "grad_norm": 0.29249873757362366, "learning_rate": 1.0618134942277327e-05, "loss": 0.5878087282180786, "step": 11530 }, { "epoch": 14.148466257668712, "grad_norm": 0.27543944120407104, "learning_rate": 1.0614029758252947e-05, "loss": 0.6820988655090332, "step": 11531 }, { "epoch": 14.149693251533742, "grad_norm": 0.261933833360672, "learning_rate": 1.0609925154085998e-05, "loss": 0.7692282199859619, "step": 11532 }, { "epoch": 14.150920245398773, "grad_norm": 0.2920111417770386, "learning_rate": 1.0605821129941934e-05, "loss": 0.7519958019256592, "step": 11533 }, { "epoch": 14.152147239263805, "grad_norm": 0.26948195695877075, "learning_rate": 1.0601717685986148e-05, "loss": 0.6250985264778137, "step": 11534 }, { "epoch": 14.153374233128835, "grad_norm": 0.23859083652496338, "learning_rate": 1.0597614822384067e-05, "loss": 0.6163429617881775, "step": 11535 }, { "epoch": 14.154601226993865, "grad_norm": 0.2902066707611084, "learning_rate": 1.0593512539301053e-05, "loss": 0.4772600531578064, "step": 11536 }, { "epoch": 14.155828220858895, "grad_norm": 0.27248525619506836, "learning_rate": 1.0589410836902472e-05, "loss": 0.7264845371246338, "step": 11537 }, { "epoch": 14.157055214723927, "grad_norm": 0.33212822675704956, "learning_rate": 1.0585309715353629e-05, "loss": 0.5709583163261414, "step": 11538 }, { "epoch": 14.158282208588957, "grad_norm": 0.2598377764225006, "learning_rate": 1.0581209174819839e-05, "loss": 0.43443527817726135, "step": 11539 }, { "epoch": 14.159509202453988, "grad_norm": 0.2561003565788269, "learning_rate": 1.0577109215466385e-05, "loss": 0.6661140322685242, "step": 11540 }, { "epoch": 14.160736196319018, "grad_norm": 0.28433462977409363, "learning_rate": 1.0573009837458522e-05, "loss": 0.7017384767532349, "step": 11541 }, { "epoch": 14.16196319018405, "grad_norm": 0.28933534026145935, "learning_rate": 1.0568911040961493e-05, "loss": 0.7242432832717896, "step": 11542 }, { "epoch": 14.16319018404908, "grad_norm": 0.24753229320049286, "learning_rate": 1.0564812826140494e-05, "loss": 0.7897351384162903, "step": 11543 }, { "epoch": 14.16441717791411, "grad_norm": 0.2495090812444687, "learning_rate": 1.056071519316072e-05, "loss": 0.5520988702774048, "step": 11544 }, { "epoch": 14.16564417177914, "grad_norm": 0.26059648394584656, "learning_rate": 1.0556618142187336e-05, "loss": 0.5041667819023132, "step": 11545 }, { "epoch": 14.166871165644173, "grad_norm": 0.25296851992607117, "learning_rate": 1.0552521673385476e-05, "loss": 0.6365418434143066, "step": 11546 }, { "epoch": 14.168098159509203, "grad_norm": 0.48922526836395264, "learning_rate": 1.0548425786920275e-05, "loss": 0.5748574733734131, "step": 11547 }, { "epoch": 14.169325153374233, "grad_norm": 0.292863130569458, "learning_rate": 1.0544330482956801e-05, "loss": 0.6221733689308167, "step": 11548 }, { "epoch": 14.170552147239263, "grad_norm": 0.2593311369419098, "learning_rate": 1.054023576166014e-05, "loss": 0.6627856492996216, "step": 11549 }, { "epoch": 14.171779141104295, "grad_norm": 0.27165818214416504, "learning_rate": 1.0536141623195334e-05, "loss": 0.6785040497779846, "step": 11550 }, { "epoch": 14.173006134969325, "grad_norm": 0.2407885044813156, "learning_rate": 1.0532048067727404e-05, "loss": 0.5626015663146973, "step": 11551 }, { "epoch": 14.174233128834356, "grad_norm": 0.23753036558628082, "learning_rate": 1.0527955095421365e-05, "loss": 0.5027283430099487, "step": 11552 }, { "epoch": 14.175460122699386, "grad_norm": 0.24718518555164337, "learning_rate": 1.052386270644217e-05, "loss": 0.7547793984413147, "step": 11553 }, { "epoch": 14.176687116564418, "grad_norm": 0.24381586909294128, "learning_rate": 1.0519770900954784e-05, "loss": 0.7238664627075195, "step": 11554 }, { "epoch": 14.177914110429448, "grad_norm": 0.2687780261039734, "learning_rate": 1.051567967912413e-05, "loss": 0.611414909362793, "step": 11555 }, { "epoch": 14.179141104294478, "grad_norm": 0.2844390869140625, "learning_rate": 1.0511589041115123e-05, "loss": 0.5390043258666992, "step": 11556 }, { "epoch": 14.180368098159509, "grad_norm": 0.3045433461666107, "learning_rate": 1.0507498987092634e-05, "loss": 0.7269600033760071, "step": 11557 }, { "epoch": 14.18159509202454, "grad_norm": 0.24414445459842682, "learning_rate": 1.050340951722153e-05, "loss": 0.6822021007537842, "step": 11558 }, { "epoch": 14.18282208588957, "grad_norm": 0.24097026884555817, "learning_rate": 1.0499320631666642e-05, "loss": 0.5554944276809692, "step": 11559 }, { "epoch": 14.184049079754601, "grad_norm": 0.2709251344203949, "learning_rate": 1.0495232330592783e-05, "loss": 0.8742666840553284, "step": 11560 }, { "epoch": 14.185276073619631, "grad_norm": 0.24690572917461395, "learning_rate": 1.0491144614164748e-05, "loss": 0.32827943563461304, "step": 11561 }, { "epoch": 14.186503067484663, "grad_norm": 0.22189676761627197, "learning_rate": 1.0487057482547283e-05, "loss": 0.5533039569854736, "step": 11562 }, { "epoch": 14.187730061349694, "grad_norm": 0.3060120642185211, "learning_rate": 1.0482970935905139e-05, "loss": 0.605223536491394, "step": 11563 }, { "epoch": 14.188957055214724, "grad_norm": 0.2607632875442505, "learning_rate": 1.0478884974403037e-05, "loss": 0.7772994041442871, "step": 11564 }, { "epoch": 14.190184049079754, "grad_norm": 0.2832651734352112, "learning_rate": 1.0474799598205663e-05, "loss": 0.631005048751831, "step": 11565 }, { "epoch": 14.191411042944786, "grad_norm": 0.3094843029975891, "learning_rate": 1.0470714807477697e-05, "loss": 0.546908974647522, "step": 11566 }, { "epoch": 14.192638036809816, "grad_norm": 0.29571714997291565, "learning_rate": 1.046663060238377e-05, "loss": 0.7851329445838928, "step": 11567 }, { "epoch": 14.193865030674846, "grad_norm": 0.24397623538970947, "learning_rate": 1.0462546983088515e-05, "loss": 0.6630047559738159, "step": 11568 }, { "epoch": 14.195092024539877, "grad_norm": 0.19251631200313568, "learning_rate": 1.0458463949756528e-05, "loss": 0.31495752930641174, "step": 11569 }, { "epoch": 14.196319018404909, "grad_norm": 0.24183069169521332, "learning_rate": 1.0454381502552385e-05, "loss": 0.577174186706543, "step": 11570 }, { "epoch": 14.197546012269939, "grad_norm": 0.23863157629966736, "learning_rate": 1.0450299641640649e-05, "loss": 0.4037061333656311, "step": 11571 }, { "epoch": 14.198773006134969, "grad_norm": 0.32868996262550354, "learning_rate": 1.0446218367185826e-05, "loss": 0.6206008791923523, "step": 11572 }, { "epoch": 14.2, "grad_norm": 0.24038375914096832, "learning_rate": 1.0442137679352432e-05, "loss": 0.543275773525238, "step": 11573 }, { "epoch": 14.201226993865031, "grad_norm": 0.3429136276245117, "learning_rate": 1.043805757830495e-05, "loss": 0.4938139021396637, "step": 11574 }, { "epoch": 14.202453987730062, "grad_norm": 0.25281310081481934, "learning_rate": 1.0433978064207834e-05, "loss": 0.5589932203292847, "step": 11575 }, { "epoch": 14.203680981595092, "grad_norm": 0.2654656171798706, "learning_rate": 1.0429899137225526e-05, "loss": 0.6340378522872925, "step": 11576 }, { "epoch": 14.204907975460122, "grad_norm": 0.30154964327812195, "learning_rate": 1.042582079752242e-05, "loss": 0.5767189264297485, "step": 11577 }, { "epoch": 14.206134969325154, "grad_norm": 0.28971606492996216, "learning_rate": 1.0421743045262904e-05, "loss": 0.7637360095977783, "step": 11578 }, { "epoch": 14.207361963190184, "grad_norm": 0.25369518995285034, "learning_rate": 1.0417665880611355e-05, "loss": 0.6785928010940552, "step": 11579 }, { "epoch": 14.208588957055214, "grad_norm": 0.26056984066963196, "learning_rate": 1.0413589303732113e-05, "loss": 0.5249828100204468, "step": 11580 }, { "epoch": 14.209815950920245, "grad_norm": 0.2766437828540802, "learning_rate": 1.0409513314789476e-05, "loss": 0.47258424758911133, "step": 11581 }, { "epoch": 14.211042944785277, "grad_norm": 0.23596923053264618, "learning_rate": 1.0405437913947744e-05, "loss": 0.49163079261779785, "step": 11582 }, { "epoch": 14.212269938650307, "grad_norm": 0.24279949069023132, "learning_rate": 1.0401363101371186e-05, "loss": 0.677746057510376, "step": 11583 }, { "epoch": 14.213496932515337, "grad_norm": 0.28578317165374756, "learning_rate": 1.0397288877224042e-05, "loss": 0.4581337869167328, "step": 11584 }, { "epoch": 14.214723926380367, "grad_norm": 0.29982104897499084, "learning_rate": 1.0393215241670545e-05, "loss": 0.6203214526176453, "step": 11585 }, { "epoch": 14.2159509202454, "grad_norm": 0.30851849913597107, "learning_rate": 1.0389142194874873e-05, "loss": 0.6826757192611694, "step": 11586 }, { "epoch": 14.21717791411043, "grad_norm": 0.2538372576236725, "learning_rate": 1.0385069737001208e-05, "loss": 0.49730199575424194, "step": 11587 }, { "epoch": 14.21840490797546, "grad_norm": 0.26597216725349426, "learning_rate": 1.0380997868213698e-05, "loss": 0.69677734375, "step": 11588 }, { "epoch": 14.21963190184049, "grad_norm": 0.24172450602054596, "learning_rate": 1.0376926588676472e-05, "loss": 0.6189339756965637, "step": 11589 }, { "epoch": 14.220858895705522, "grad_norm": 0.2842448949813843, "learning_rate": 1.0372855898553635e-05, "loss": 0.6268017292022705, "step": 11590 }, { "epoch": 14.222085889570552, "grad_norm": 0.2684381604194641, "learning_rate": 1.036878579800925e-05, "loss": 0.6987808346748352, "step": 11591 }, { "epoch": 14.223312883435582, "grad_norm": 0.2847000062465668, "learning_rate": 1.036471628720738e-05, "loss": 0.6702919006347656, "step": 11592 }, { "epoch": 14.224539877300613, "grad_norm": 0.27225548028945923, "learning_rate": 1.0360647366312057e-05, "loss": 0.5204352140426636, "step": 11593 }, { "epoch": 14.225766871165645, "grad_norm": 0.27660465240478516, "learning_rate": 1.0356579035487285e-05, "loss": 0.7447108030319214, "step": 11594 }, { "epoch": 14.226993865030675, "grad_norm": 0.28030088543891907, "learning_rate": 1.0352511294897055e-05, "loss": 0.6764894723892212, "step": 11595 }, { "epoch": 14.228220858895705, "grad_norm": 0.26720941066741943, "learning_rate": 1.0348444144705312e-05, "loss": 0.5008120536804199, "step": 11596 }, { "epoch": 14.229447852760735, "grad_norm": 0.24618709087371826, "learning_rate": 1.0344377585075998e-05, "loss": 0.7246081829071045, "step": 11597 }, { "epoch": 14.230674846625767, "grad_norm": 0.3062438368797302, "learning_rate": 1.0340311616173023e-05, "loss": 0.5180500745773315, "step": 11598 }, { "epoch": 14.231901840490798, "grad_norm": 0.24299469590187073, "learning_rate": 1.0336246238160278e-05, "loss": 0.6006425619125366, "step": 11599 }, { "epoch": 14.233128834355828, "grad_norm": 0.30845773220062256, "learning_rate": 1.0332181451201622e-05, "loss": 0.6253533363342285, "step": 11600 }, { "epoch": 14.23435582822086, "grad_norm": 0.2842804193496704, "learning_rate": 1.0328117255460898e-05, "loss": 0.5894040465354919, "step": 11601 }, { "epoch": 14.23558282208589, "grad_norm": 0.25074324011802673, "learning_rate": 1.0324053651101926e-05, "loss": 0.5860624313354492, "step": 11602 }, { "epoch": 14.23680981595092, "grad_norm": 0.2756790518760681, "learning_rate": 1.031999063828849e-05, "loss": 0.687311053276062, "step": 11603 }, { "epoch": 14.23803680981595, "grad_norm": 0.28584912419319153, "learning_rate": 1.0315928217184373e-05, "loss": 0.721146821975708, "step": 11604 }, { "epoch": 14.239263803680982, "grad_norm": 0.3077791929244995, "learning_rate": 1.0311866387953301e-05, "loss": 0.6045595407485962, "step": 11605 }, { "epoch": 14.240490797546013, "grad_norm": 0.277397483587265, "learning_rate": 1.0307805150759e-05, "loss": 0.49730151891708374, "step": 11606 }, { "epoch": 14.241717791411043, "grad_norm": 0.24875666201114655, "learning_rate": 1.0303744505765173e-05, "loss": 0.6803578734397888, "step": 11607 }, { "epoch": 14.242944785276073, "grad_norm": 0.26108965277671814, "learning_rate": 1.0299684453135488e-05, "loss": 0.6103701591491699, "step": 11608 }, { "epoch": 14.244171779141105, "grad_norm": 0.2439052015542984, "learning_rate": 1.0295624993033607e-05, "loss": 0.48269546031951904, "step": 11609 }, { "epoch": 14.245398773006135, "grad_norm": 0.26308202743530273, "learning_rate": 1.0291566125623136e-05, "loss": 0.6822654008865356, "step": 11610 }, { "epoch": 14.246625766871166, "grad_norm": 0.31379976868629456, "learning_rate": 1.0287507851067682e-05, "loss": 0.4115161895751953, "step": 11611 }, { "epoch": 14.247852760736196, "grad_norm": 0.28767457604408264, "learning_rate": 1.0283450169530828e-05, "loss": 0.4456314444541931, "step": 11612 }, { "epoch": 14.249079754601228, "grad_norm": 0.2734333574771881, "learning_rate": 1.0279393081176122e-05, "loss": 0.6150249242782593, "step": 11613 }, { "epoch": 14.250306748466258, "grad_norm": 0.2928667962551117, "learning_rate": 1.0275336586167106e-05, "loss": 0.42845863103866577, "step": 11614 }, { "epoch": 14.251533742331288, "grad_norm": 0.2770393192768097, "learning_rate": 1.027128068466727e-05, "loss": 0.43842968344688416, "step": 11615 }, { "epoch": 14.252760736196318, "grad_norm": 0.2784719467163086, "learning_rate": 1.0267225376840099e-05, "loss": 0.6848158836364746, "step": 11616 }, { "epoch": 14.25398773006135, "grad_norm": 0.32958880066871643, "learning_rate": 1.0263170662849053e-05, "loss": 0.5544004440307617, "step": 11617 }, { "epoch": 14.25521472392638, "grad_norm": 0.3158535361289978, "learning_rate": 1.025911654285757e-05, "loss": 0.691526472568512, "step": 11618 }, { "epoch": 14.256441717791411, "grad_norm": 0.2829363942146301, "learning_rate": 1.0255063017029054e-05, "loss": 0.4727117121219635, "step": 11619 }, { "epoch": 14.257668711656441, "grad_norm": 0.39720258116722107, "learning_rate": 1.0251010085526905e-05, "loss": 0.5574281811714172, "step": 11620 }, { "epoch": 14.258895705521473, "grad_norm": 0.25001901388168335, "learning_rate": 1.0246957748514467e-05, "loss": 0.5325323343276978, "step": 11621 }, { "epoch": 14.260122699386503, "grad_norm": 0.2756943106651306, "learning_rate": 1.0242906006155078e-05, "loss": 0.8108733892440796, "step": 11622 }, { "epoch": 14.261349693251534, "grad_norm": 0.25394153594970703, "learning_rate": 1.0238854858612057e-05, "loss": 0.5331069827079773, "step": 11623 }, { "epoch": 14.262576687116564, "grad_norm": 0.2878991365432739, "learning_rate": 1.0234804306048711e-05, "loss": 0.6281964778900146, "step": 11624 }, { "epoch": 14.263803680981596, "grad_norm": 0.28492575883865356, "learning_rate": 1.0230754348628285e-05, "loss": 0.6100224256515503, "step": 11625 }, { "epoch": 14.265030674846626, "grad_norm": 0.28641027212142944, "learning_rate": 1.0226704986514026e-05, "loss": 0.7055392265319824, "step": 11626 }, { "epoch": 14.266257668711656, "grad_norm": 0.2844093143939972, "learning_rate": 1.0222656219869156e-05, "loss": 0.684749186038971, "step": 11627 }, { "epoch": 14.267484662576686, "grad_norm": 0.29259729385375977, "learning_rate": 1.0218608048856868e-05, "loss": 0.651931881904602, "step": 11628 }, { "epoch": 14.268711656441718, "grad_norm": 0.3063000738620758, "learning_rate": 1.0214560473640339e-05, "loss": 0.26592034101486206, "step": 11629 }, { "epoch": 14.269938650306749, "grad_norm": 0.2713453471660614, "learning_rate": 1.0210513494382698e-05, "loss": 0.5135734677314758, "step": 11630 }, { "epoch": 14.271165644171779, "grad_norm": 0.2724055349826813, "learning_rate": 1.0206467111247079e-05, "loss": 0.6346993446350098, "step": 11631 }, { "epoch": 14.27239263803681, "grad_norm": 0.26313379406929016, "learning_rate": 1.0202421324396576e-05, "loss": 0.6307381987571716, "step": 11632 }, { "epoch": 14.273619631901841, "grad_norm": 0.2881172299385071, "learning_rate": 1.019837613399427e-05, "loss": 0.6702436208724976, "step": 11633 }, { "epoch": 14.274846625766871, "grad_norm": 0.2605590224266052, "learning_rate": 1.0194331540203212e-05, "loss": 0.6015172600746155, "step": 11634 }, { "epoch": 14.276073619631902, "grad_norm": 0.23824821412563324, "learning_rate": 1.0190287543186414e-05, "loss": 0.6057284474372864, "step": 11635 }, { "epoch": 14.277300613496932, "grad_norm": 0.2690068185329437, "learning_rate": 1.0186244143106888e-05, "loss": 0.7393240332603455, "step": 11636 }, { "epoch": 14.278527607361964, "grad_norm": 0.2712327837944031, "learning_rate": 1.018220134012761e-05, "loss": 0.49680060148239136, "step": 11637 }, { "epoch": 14.279754601226994, "grad_norm": 0.28397083282470703, "learning_rate": 1.0178159134411533e-05, "loss": 0.6469228267669678, "step": 11638 }, { "epoch": 14.280981595092024, "grad_norm": 0.29525649547576904, "learning_rate": 1.0174117526121596e-05, "loss": 0.5859490633010864, "step": 11639 }, { "epoch": 14.282208588957054, "grad_norm": 0.30463072657585144, "learning_rate": 1.017007651542069e-05, "loss": 0.6803374290466309, "step": 11640 }, { "epoch": 14.283435582822086, "grad_norm": 0.26856759190559387, "learning_rate": 1.0166036102471702e-05, "loss": 0.5894753932952881, "step": 11641 }, { "epoch": 14.284662576687117, "grad_norm": 0.254022479057312, "learning_rate": 1.0161996287437493e-05, "loss": 0.6056731939315796, "step": 11642 }, { "epoch": 14.285889570552147, "grad_norm": 0.5049472451210022, "learning_rate": 1.0157957070480892e-05, "loss": 0.47855401039123535, "step": 11643 }, { "epoch": 14.287116564417177, "grad_norm": 0.2751588225364685, "learning_rate": 1.0153918451764721e-05, "loss": 0.6552620530128479, "step": 11644 }, { "epoch": 14.28834355828221, "grad_norm": 0.2456403374671936, "learning_rate": 1.0149880431451735e-05, "loss": 0.524717390537262, "step": 11645 }, { "epoch": 14.28957055214724, "grad_norm": 0.270365446805954, "learning_rate": 1.0145843009704725e-05, "loss": 0.7529228925704956, "step": 11646 }, { "epoch": 14.29079754601227, "grad_norm": 0.25798097252845764, "learning_rate": 1.014180618668642e-05, "loss": 0.6581964492797852, "step": 11647 }, { "epoch": 14.2920245398773, "grad_norm": 0.23920968174934387, "learning_rate": 1.0137769962559537e-05, "loss": 0.5507673621177673, "step": 11648 }, { "epoch": 14.293251533742332, "grad_norm": 0.2581292986869812, "learning_rate": 1.013373433748675e-05, "loss": 0.5261596441268921, "step": 11649 }, { "epoch": 14.294478527607362, "grad_norm": 0.26680776476860046, "learning_rate": 1.0129699311630732e-05, "loss": 0.436542272567749, "step": 11650 }, { "epoch": 14.295705521472392, "grad_norm": 0.27545222640037537, "learning_rate": 1.0125664885154126e-05, "loss": 0.7176574468612671, "step": 11651 }, { "epoch": 14.296932515337422, "grad_norm": 0.26542091369628906, "learning_rate": 1.012163105821954e-05, "loss": 0.5867053270339966, "step": 11652 }, { "epoch": 14.298159509202454, "grad_norm": 0.2580239474773407, "learning_rate": 1.0117597830989587e-05, "loss": 0.7347652912139893, "step": 11653 }, { "epoch": 14.299386503067485, "grad_norm": 0.34873494505882263, "learning_rate": 1.0113565203626807e-05, "loss": 0.5127145648002625, "step": 11654 }, { "epoch": 14.300613496932515, "grad_norm": 0.28391990065574646, "learning_rate": 1.0109533176293756e-05, "loss": 0.8707413673400879, "step": 11655 }, { "epoch": 14.301840490797545, "grad_norm": 0.28126752376556396, "learning_rate": 1.0105501749152953e-05, "loss": 0.682701587677002, "step": 11656 }, { "epoch": 14.303067484662577, "grad_norm": 0.29063624143600464, "learning_rate": 1.0101470922366897e-05, "loss": 0.6425907611846924, "step": 11657 }, { "epoch": 14.304294478527607, "grad_norm": 0.25505298376083374, "learning_rate": 1.0097440696098063e-05, "loss": 0.6159670352935791, "step": 11658 }, { "epoch": 14.305521472392638, "grad_norm": 0.29828503727912903, "learning_rate": 1.0093411070508882e-05, "loss": 0.7601655125617981, "step": 11659 }, { "epoch": 14.30674846625767, "grad_norm": 0.27873095870018005, "learning_rate": 1.0089382045761786e-05, "loss": 0.8385028839111328, "step": 11660 }, { "epoch": 14.3079754601227, "grad_norm": 0.22291332483291626, "learning_rate": 1.0085353622019175e-05, "loss": 0.5261977314949036, "step": 11661 }, { "epoch": 14.30920245398773, "grad_norm": 0.2566237449645996, "learning_rate": 1.0081325799443419e-05, "loss": 0.5706913471221924, "step": 11662 }, { "epoch": 14.31042944785276, "grad_norm": 0.26706206798553467, "learning_rate": 1.007729857819688e-05, "loss": 0.4944911003112793, "step": 11663 }, { "epoch": 14.31165644171779, "grad_norm": 0.22351869940757751, "learning_rate": 1.0073271958441869e-05, "loss": 0.3602396249771118, "step": 11664 }, { "epoch": 14.312883435582823, "grad_norm": 0.2759931683540344, "learning_rate": 1.006924594034069e-05, "loss": 0.3466549813747406, "step": 11665 }, { "epoch": 14.314110429447853, "grad_norm": 0.256693959236145, "learning_rate": 1.0065220524055625e-05, "loss": 0.5703699588775635, "step": 11666 }, { "epoch": 14.315337423312883, "grad_norm": 0.2792109251022339, "learning_rate": 1.0061195709748921e-05, "loss": 0.7187232971191406, "step": 11667 }, { "epoch": 14.316564417177915, "grad_norm": 0.25142931938171387, "learning_rate": 1.0057171497582815e-05, "loss": 0.3475554585456848, "step": 11668 }, { "epoch": 14.317791411042945, "grad_norm": 0.2571703791618347, "learning_rate": 1.0053147887719508e-05, "loss": 0.5036716461181641, "step": 11669 }, { "epoch": 14.319018404907975, "grad_norm": 0.2893578112125397, "learning_rate": 1.0049124880321178e-05, "loss": 0.7021853923797607, "step": 11670 }, { "epoch": 14.320245398773006, "grad_norm": 0.2549828886985779, "learning_rate": 1.0045102475549984e-05, "loss": 0.666445255279541, "step": 11671 }, { "epoch": 14.321472392638038, "grad_norm": 0.2675829827785492, "learning_rate": 1.0041080673568065e-05, "loss": 0.8096634149551392, "step": 11672 }, { "epoch": 14.322699386503068, "grad_norm": 0.282967209815979, "learning_rate": 1.003705947453751e-05, "loss": 0.5641428232192993, "step": 11673 }, { "epoch": 14.323926380368098, "grad_norm": 0.30629831552505493, "learning_rate": 1.0033038878620416e-05, "loss": 0.7178536653518677, "step": 11674 }, { "epoch": 14.325153374233128, "grad_norm": 0.2520371377468109, "learning_rate": 1.0029018885978834e-05, "loss": 0.7431670427322388, "step": 11675 }, { "epoch": 14.32638036809816, "grad_norm": 0.2952970564365387, "learning_rate": 1.00249994967748e-05, "loss": 0.5152825117111206, "step": 11676 }, { "epoch": 14.32760736196319, "grad_norm": 0.2841837406158447, "learning_rate": 1.0020980711170338e-05, "loss": 0.7814938426017761, "step": 11677 }, { "epoch": 14.32883435582822, "grad_norm": 0.25574642419815063, "learning_rate": 1.0016962529327415e-05, "loss": 0.6900051236152649, "step": 11678 }, { "epoch": 14.330061349693251, "grad_norm": 0.2535971403121948, "learning_rate": 1.0012944951407997e-05, "loss": 0.7038239240646362, "step": 11679 }, { "epoch": 14.331288343558283, "grad_norm": 0.2611050605773926, "learning_rate": 1.0008927977574023e-05, "loss": 0.6170046329498291, "step": 11680 }, { "epoch": 14.332515337423313, "grad_norm": 0.23157013952732086, "learning_rate": 1.0004911607987408e-05, "loss": 0.6901949644088745, "step": 11681 }, { "epoch": 14.333742331288343, "grad_norm": 0.2495499551296234, "learning_rate": 1.0000895842810048e-05, "loss": 0.7572228908538818, "step": 11682 }, { "epoch": 14.334969325153374, "grad_norm": 0.23729175329208374, "learning_rate": 9.996880682203788e-06, "loss": 0.4743660092353821, "step": 11683 }, { "epoch": 14.336196319018406, "grad_norm": 0.25938209891319275, "learning_rate": 9.992866126330477e-06, "loss": 0.6766465902328491, "step": 11684 }, { "epoch": 14.337423312883436, "grad_norm": 0.31233537197113037, "learning_rate": 9.988852175351931e-06, "loss": 0.4935191869735718, "step": 11685 }, { "epoch": 14.338650306748466, "grad_norm": 0.25912076234817505, "learning_rate": 9.984838829429941e-06, "loss": 0.7400537729263306, "step": 11686 }, { "epoch": 14.339877300613496, "grad_norm": 0.2643386125564575, "learning_rate": 9.980826088726283e-06, "loss": 0.6546769142150879, "step": 11687 }, { "epoch": 14.341104294478528, "grad_norm": 0.3196462392807007, "learning_rate": 9.976813953402677e-06, "loss": 0.6796534061431885, "step": 11688 }, { "epoch": 14.342331288343559, "grad_norm": 0.2851763069629669, "learning_rate": 9.972802423620848e-06, "loss": 0.5095332264900208, "step": 11689 }, { "epoch": 14.343558282208589, "grad_norm": 0.25660640001296997, "learning_rate": 9.9687914995425e-06, "loss": 0.7046597003936768, "step": 11690 }, { "epoch": 14.344785276073619, "grad_norm": 0.25251299142837524, "learning_rate": 9.964781181329311e-06, "loss": 0.7989528179168701, "step": 11691 }, { "epoch": 14.346012269938651, "grad_norm": 0.29277488589286804, "learning_rate": 9.960771469142896e-06, "loss": 0.6107009649276733, "step": 11692 }, { "epoch": 14.347239263803681, "grad_norm": 0.2663220763206482, "learning_rate": 9.956762363144892e-06, "loss": 0.6059300899505615, "step": 11693 }, { "epoch": 14.348466257668711, "grad_norm": 0.2786979675292969, "learning_rate": 9.952753863496895e-06, "loss": 0.6798167824745178, "step": 11694 }, { "epoch": 14.349693251533742, "grad_norm": 0.2164987176656723, "learning_rate": 9.948745970360468e-06, "loss": 0.309867262840271, "step": 11695 }, { "epoch": 14.350920245398774, "grad_norm": 0.2443779855966568, "learning_rate": 9.944738683897175e-06, "loss": 0.5534678101539612, "step": 11696 }, { "epoch": 14.352147239263804, "grad_norm": 0.24932734668254852, "learning_rate": 9.940732004268518e-06, "loss": 0.5150452852249146, "step": 11697 }, { "epoch": 14.353374233128834, "grad_norm": 0.36736950278282166, "learning_rate": 9.936725931636003e-06, "loss": 0.5673596858978271, "step": 11698 }, { "epoch": 14.354601226993864, "grad_norm": 0.2701435089111328, "learning_rate": 9.932720466161108e-06, "loss": 0.5269392728805542, "step": 11699 }, { "epoch": 14.355828220858896, "grad_norm": 0.40237486362457275, "learning_rate": 9.928715608005274e-06, "loss": 0.728110671043396, "step": 11700 }, { "epoch": 14.357055214723927, "grad_norm": 0.24996350705623627, "learning_rate": 9.924711357329938e-06, "loss": 0.36401626467704773, "step": 11701 }, { "epoch": 14.358282208588957, "grad_norm": 0.23692429065704346, "learning_rate": 9.920707714296481e-06, "loss": 0.6370718479156494, "step": 11702 }, { "epoch": 14.359509202453987, "grad_norm": 0.29575130343437195, "learning_rate": 9.91670467906629e-06, "loss": 0.6447596549987793, "step": 11703 }, { "epoch": 14.360736196319019, "grad_norm": 0.271743506193161, "learning_rate": 9.912702251800714e-06, "loss": 0.825739860534668, "step": 11704 }, { "epoch": 14.36196319018405, "grad_norm": 0.2533004879951477, "learning_rate": 9.908700432661083e-06, "loss": 0.6469017863273621, "step": 11705 }, { "epoch": 14.36319018404908, "grad_norm": 0.34989607334136963, "learning_rate": 9.904699221808702e-06, "loss": 0.5112596750259399, "step": 11706 }, { "epoch": 14.36441717791411, "grad_norm": 0.3080288767814636, "learning_rate": 9.900698619404833e-06, "loss": 0.5386574268341064, "step": 11707 }, { "epoch": 14.365644171779142, "grad_norm": 0.2797042429447174, "learning_rate": 9.89669862561074e-06, "loss": 0.6558998823165894, "step": 11708 }, { "epoch": 14.366871165644172, "grad_norm": 0.2798289358615875, "learning_rate": 9.892699240587647e-06, "loss": 0.6172792911529541, "step": 11709 }, { "epoch": 14.368098159509202, "grad_norm": 0.24727284908294678, "learning_rate": 9.888700464496762e-06, "loss": 0.5526530742645264, "step": 11710 }, { "epoch": 14.369325153374232, "grad_norm": 0.26891788840293884, "learning_rate": 9.884702297499271e-06, "loss": 0.5310153961181641, "step": 11711 }, { "epoch": 14.370552147239264, "grad_norm": 0.2551048994064331, "learning_rate": 9.880704739756302e-06, "loss": 0.46983373165130615, "step": 11712 }, { "epoch": 14.371779141104295, "grad_norm": 0.2517128884792328, "learning_rate": 9.876707791429013e-06, "loss": 0.484358549118042, "step": 11713 }, { "epoch": 14.373006134969325, "grad_norm": 0.2635323703289032, "learning_rate": 9.872711452678498e-06, "loss": 0.5377591848373413, "step": 11714 }, { "epoch": 14.374233128834355, "grad_norm": 0.3304626941680908, "learning_rate": 9.868715723665853e-06, "loss": 0.7083470821380615, "step": 11715 }, { "epoch": 14.375460122699387, "grad_norm": 0.26937738060951233, "learning_rate": 9.864720604552107e-06, "loss": 0.6341529488563538, "step": 11716 }, { "epoch": 14.376687116564417, "grad_norm": 0.2798765003681183, "learning_rate": 9.86072609549831e-06, "loss": 0.5999367237091064, "step": 11717 }, { "epoch": 14.377914110429447, "grad_norm": 0.2907653748989105, "learning_rate": 9.856732196665466e-06, "loss": 0.5404109954833984, "step": 11718 }, { "epoch": 14.379141104294478, "grad_norm": 0.32206419110298157, "learning_rate": 9.852738908214554e-06, "loss": 0.7299846410751343, "step": 11719 }, { "epoch": 14.38036809815951, "grad_norm": 0.30107581615448, "learning_rate": 9.848746230306546e-06, "loss": 0.7134767770767212, "step": 11720 }, { "epoch": 14.38159509202454, "grad_norm": 0.3191124498844147, "learning_rate": 9.844754163102357e-06, "loss": 0.5483623147010803, "step": 11721 }, { "epoch": 14.38282208588957, "grad_norm": 0.27367037534713745, "learning_rate": 9.840762706762902e-06, "loss": 0.6707201600074768, "step": 11722 }, { "epoch": 14.3840490797546, "grad_norm": 0.260766863822937, "learning_rate": 9.836771861449068e-06, "loss": 0.6705654859542847, "step": 11723 }, { "epoch": 14.385276073619632, "grad_norm": 0.2361653596162796, "learning_rate": 9.832781627321711e-06, "loss": 0.36612576246261597, "step": 11724 }, { "epoch": 14.386503067484663, "grad_norm": 0.2978599965572357, "learning_rate": 9.82879200454167e-06, "loss": 0.7604893445968628, "step": 11725 }, { "epoch": 14.387730061349693, "grad_norm": 0.26159146428108215, "learning_rate": 9.824802993269764e-06, "loss": 0.634406328201294, "step": 11726 }, { "epoch": 14.388957055214725, "grad_norm": 0.2704288363456726, "learning_rate": 9.820814593666757e-06, "loss": 0.2953636646270752, "step": 11727 }, { "epoch": 14.390184049079755, "grad_norm": 0.28129318356513977, "learning_rate": 9.816826805893423e-06, "loss": 0.39989757537841797, "step": 11728 }, { "epoch": 14.391411042944785, "grad_norm": 0.6254016757011414, "learning_rate": 9.812839630110497e-06, "loss": 0.6250792741775513, "step": 11729 }, { "epoch": 14.392638036809815, "grad_norm": 0.2530275881290436, "learning_rate": 9.808853066478688e-06, "loss": 0.5230201482772827, "step": 11730 }, { "epoch": 14.393865030674847, "grad_norm": 0.2960681915283203, "learning_rate": 9.804867115158694e-06, "loss": 0.7462234497070312, "step": 11731 }, { "epoch": 14.395092024539878, "grad_norm": 0.26249706745147705, "learning_rate": 9.800881776311164e-06, "loss": 0.6365996599197388, "step": 11732 }, { "epoch": 14.396319018404908, "grad_norm": 0.271910160779953, "learning_rate": 9.796897050096737e-06, "loss": 0.4156608283519745, "step": 11733 }, { "epoch": 14.397546012269938, "grad_norm": 0.2690506875514984, "learning_rate": 9.792912936676021e-06, "loss": 0.3724501430988312, "step": 11734 }, { "epoch": 14.39877300613497, "grad_norm": 0.2862379252910614, "learning_rate": 9.78892943620963e-06, "loss": 0.7513091564178467, "step": 11735 }, { "epoch": 14.4, "grad_norm": 0.28584080934524536, "learning_rate": 9.784946548858101e-06, "loss": 0.5104522109031677, "step": 11736 }, { "epoch": 14.40122699386503, "grad_norm": 0.26951155066490173, "learning_rate": 9.780964274781984e-06, "loss": 0.6711382865905762, "step": 11737 }, { "epoch": 14.40245398773006, "grad_norm": 0.27520206570625305, "learning_rate": 9.776982614141788e-06, "loss": 0.5449602007865906, "step": 11738 }, { "epoch": 14.403680981595093, "grad_norm": 0.2349061369895935, "learning_rate": 9.773001567098008e-06, "loss": 0.6121174097061157, "step": 11739 }, { "epoch": 14.404907975460123, "grad_norm": 0.28014063835144043, "learning_rate": 9.769021133811116e-06, "loss": 0.5804373621940613, "step": 11740 }, { "epoch": 14.406134969325153, "grad_norm": 0.24846085906028748, "learning_rate": 9.765041314441528e-06, "loss": 0.5172836780548096, "step": 11741 }, { "epoch": 14.407361963190183, "grad_norm": 0.2672063410282135, "learning_rate": 9.761062109149677e-06, "loss": 0.7479920387268066, "step": 11742 }, { "epoch": 14.408588957055215, "grad_norm": 0.31861579418182373, "learning_rate": 9.75708351809595e-06, "loss": 0.6217674016952515, "step": 11743 }, { "epoch": 14.409815950920246, "grad_norm": 0.2493344396352768, "learning_rate": 9.75310554144071e-06, "loss": 0.501952052116394, "step": 11744 }, { "epoch": 14.411042944785276, "grad_norm": 0.2760743200778961, "learning_rate": 9.749128179344311e-06, "loss": 0.6284095644950867, "step": 11745 }, { "epoch": 14.412269938650306, "grad_norm": 0.24546556174755096, "learning_rate": 9.745151431967047e-06, "loss": 0.7477366328239441, "step": 11746 }, { "epoch": 14.413496932515338, "grad_norm": 0.3159914016723633, "learning_rate": 9.741175299469224e-06, "loss": 0.5347251296043396, "step": 11747 }, { "epoch": 14.414723926380368, "grad_norm": 0.28996074199676514, "learning_rate": 9.737199782011103e-06, "loss": 0.7124683856964111, "step": 11748 }, { "epoch": 14.415950920245399, "grad_norm": 0.24146239459514618, "learning_rate": 9.733224879752928e-06, "loss": 0.568655252456665, "step": 11749 }, { "epoch": 14.417177914110429, "grad_norm": 0.25604668259620667, "learning_rate": 9.729250592854924e-06, "loss": 0.3848334550857544, "step": 11750 }, { "epoch": 14.41840490797546, "grad_norm": 0.2558605670928955, "learning_rate": 9.725276921477266e-06, "loss": 0.7340205907821655, "step": 11751 }, { "epoch": 14.419631901840491, "grad_norm": 0.25603339076042175, "learning_rate": 9.721303865780132e-06, "loss": 0.5546281337738037, "step": 11752 }, { "epoch": 14.420858895705521, "grad_norm": 0.28208649158477783, "learning_rate": 9.717331425923662e-06, "loss": 0.6044917702674866, "step": 11753 }, { "epoch": 14.422085889570551, "grad_norm": 0.31440049409866333, "learning_rate": 9.713359602067975e-06, "loss": 0.6141207814216614, "step": 11754 }, { "epoch": 14.423312883435583, "grad_norm": 0.28850749135017395, "learning_rate": 9.709388394373173e-06, "loss": 0.7723310589790344, "step": 11755 }, { "epoch": 14.424539877300614, "grad_norm": 0.2710738182067871, "learning_rate": 9.705417802999298e-06, "loss": 0.7105268239974976, "step": 11756 }, { "epoch": 14.425766871165644, "grad_norm": 0.25293153524398804, "learning_rate": 9.701447828106416e-06, "loss": 0.5280289649963379, "step": 11757 }, { "epoch": 14.426993865030674, "grad_norm": 0.26367974281311035, "learning_rate": 9.69747846985454e-06, "loss": 0.5904434323310852, "step": 11758 }, { "epoch": 14.428220858895706, "grad_norm": 0.32183486223220825, "learning_rate": 9.693509728403675e-06, "loss": 0.5957781672477722, "step": 11759 }, { "epoch": 14.429447852760736, "grad_norm": 0.24374188482761383, "learning_rate": 9.689541603913763e-06, "loss": 0.5423633456230164, "step": 11760 }, { "epoch": 14.430674846625767, "grad_norm": 0.28974616527557373, "learning_rate": 9.685574096544764e-06, "loss": 0.5238823890686035, "step": 11761 }, { "epoch": 14.431901840490797, "grad_norm": 0.303367555141449, "learning_rate": 9.681607206456596e-06, "loss": 0.7312635779380798, "step": 11762 }, { "epoch": 14.433128834355829, "grad_norm": 0.311206579208374, "learning_rate": 9.677640933809153e-06, "loss": 0.6968424916267395, "step": 11763 }, { "epoch": 14.434355828220859, "grad_norm": 0.3047153055667877, "learning_rate": 9.673675278762309e-06, "loss": 0.5855445861816406, "step": 11764 }, { "epoch": 14.43558282208589, "grad_norm": 0.25375598669052124, "learning_rate": 9.669710241475895e-06, "loss": 0.6343463659286499, "step": 11765 }, { "epoch": 14.43680981595092, "grad_norm": 0.29300156235694885, "learning_rate": 9.665745822109739e-06, "loss": 0.6673855781555176, "step": 11766 }, { "epoch": 14.438036809815952, "grad_norm": 0.24714310467243195, "learning_rate": 9.661782020823634e-06, "loss": 0.45219913125038147, "step": 11767 }, { "epoch": 14.439263803680982, "grad_norm": 0.25667181611061096, "learning_rate": 9.657818837777349e-06, "loss": 0.7295295000076294, "step": 11768 }, { "epoch": 14.440490797546012, "grad_norm": 0.2878057062625885, "learning_rate": 9.65385627313064e-06, "loss": 0.4786522388458252, "step": 11769 }, { "epoch": 14.441717791411042, "grad_norm": 0.23931974172592163, "learning_rate": 9.649894327043205e-06, "loss": 0.5329768061637878, "step": 11770 }, { "epoch": 14.442944785276074, "grad_norm": 0.2932173013687134, "learning_rate": 9.64593299967475e-06, "loss": 0.617416501045227, "step": 11771 }, { "epoch": 14.444171779141104, "grad_norm": 0.2638016939163208, "learning_rate": 9.641972291184947e-06, "loss": 0.1731855571269989, "step": 11772 }, { "epoch": 14.445398773006135, "grad_norm": 0.2512105107307434, "learning_rate": 9.638012201733435e-06, "loss": 0.607144832611084, "step": 11773 }, { "epoch": 14.446625766871165, "grad_norm": 0.280652791261673, "learning_rate": 9.634052731479847e-06, "loss": 0.6779596209526062, "step": 11774 }, { "epoch": 14.447852760736197, "grad_norm": 0.26529914140701294, "learning_rate": 9.630093880583763e-06, "loss": 0.6466817855834961, "step": 11775 }, { "epoch": 14.449079754601227, "grad_norm": 0.24446143209934235, "learning_rate": 9.626135649204753e-06, "loss": 0.6725552082061768, "step": 11776 }, { "epoch": 14.450306748466257, "grad_norm": 0.2400067299604416, "learning_rate": 9.622178037502373e-06, "loss": 0.5621616244316101, "step": 11777 }, { "epoch": 14.451533742331288, "grad_norm": 0.335723876953125, "learning_rate": 9.618221045636132e-06, "loss": 0.4233739972114563, "step": 11778 }, { "epoch": 14.45276073619632, "grad_norm": 0.2664281725883484, "learning_rate": 9.614264673765533e-06, "loss": 0.6468360424041748, "step": 11779 }, { "epoch": 14.45398773006135, "grad_norm": 0.26004430651664734, "learning_rate": 9.610308922050046e-06, "loss": 0.45595628023147583, "step": 11780 }, { "epoch": 14.45521472392638, "grad_norm": 0.2556781470775604, "learning_rate": 9.606353790649108e-06, "loss": 0.5415158271789551, "step": 11781 }, { "epoch": 14.45644171779141, "grad_norm": 0.24314844608306885, "learning_rate": 9.602399279722148e-06, "loss": 0.6119797229766846, "step": 11782 }, { "epoch": 14.457668711656442, "grad_norm": 0.26059019565582275, "learning_rate": 9.598445389428567e-06, "loss": 0.6628208756446838, "step": 11783 }, { "epoch": 14.458895705521472, "grad_norm": 0.24498052895069122, "learning_rate": 9.594492119927716e-06, "loss": 0.5941495895385742, "step": 11784 }, { "epoch": 14.460122699386503, "grad_norm": 0.26554417610168457, "learning_rate": 9.59053947137895e-06, "loss": 0.724120020866394, "step": 11785 }, { "epoch": 14.461349693251535, "grad_norm": 0.2995929419994354, "learning_rate": 9.586587443941589e-06, "loss": 0.4760098457336426, "step": 11786 }, { "epoch": 14.462576687116565, "grad_norm": 0.29420867562294006, "learning_rate": 9.582636037774927e-06, "loss": 0.5271052122116089, "step": 11787 }, { "epoch": 14.463803680981595, "grad_norm": 0.24538031220436096, "learning_rate": 9.578685253038244e-06, "loss": 0.688007116317749, "step": 11788 }, { "epoch": 14.465030674846625, "grad_norm": 0.2895897328853607, "learning_rate": 9.574735089890766e-06, "loss": 0.6689246892929077, "step": 11789 }, { "epoch": 14.466257668711656, "grad_norm": 0.30739694833755493, "learning_rate": 9.570785548491723e-06, "loss": 0.4230445623397827, "step": 11790 }, { "epoch": 14.467484662576688, "grad_norm": 0.27316969633102417, "learning_rate": 9.566836629000309e-06, "loss": 0.5462379455566406, "step": 11791 }, { "epoch": 14.468711656441718, "grad_norm": 0.2297501116991043, "learning_rate": 9.562888331575692e-06, "loss": 0.7207776308059692, "step": 11792 }, { "epoch": 14.469938650306748, "grad_norm": 0.23985572159290314, "learning_rate": 9.55894065637703e-06, "loss": 0.5604175329208374, "step": 11793 }, { "epoch": 14.47116564417178, "grad_norm": 0.29552018642425537, "learning_rate": 9.55499360356342e-06, "loss": 0.5365318059921265, "step": 11794 }, { "epoch": 14.47239263803681, "grad_norm": 0.3115067481994629, "learning_rate": 9.551047173293967e-06, "loss": 0.6077832579612732, "step": 11795 }, { "epoch": 14.47361963190184, "grad_norm": 0.23052147030830383, "learning_rate": 9.547101365727742e-06, "loss": 0.555755078792572, "step": 11796 }, { "epoch": 14.47484662576687, "grad_norm": 0.22295694053173065, "learning_rate": 9.543156181023789e-06, "loss": 0.6543583273887634, "step": 11797 }, { "epoch": 14.476073619631903, "grad_norm": 0.30599573254585266, "learning_rate": 9.539211619341131e-06, "loss": 0.5942333936691284, "step": 11798 }, { "epoch": 14.477300613496933, "grad_norm": 0.2867882549762726, "learning_rate": 9.535267680838752e-06, "loss": 0.48224663734436035, "step": 11799 }, { "epoch": 14.478527607361963, "grad_norm": 0.2575134038925171, "learning_rate": 9.531324365675618e-06, "loss": 0.6957072615623474, "step": 11800 }, { "epoch": 14.479754601226993, "grad_norm": 0.2803838551044464, "learning_rate": 9.527381674010685e-06, "loss": 0.5908041000366211, "step": 11801 }, { "epoch": 14.480981595092025, "grad_norm": 0.2754668593406677, "learning_rate": 9.523439606002879e-06, "loss": 0.7138383984565735, "step": 11802 }, { "epoch": 14.482208588957056, "grad_norm": 0.27329614758491516, "learning_rate": 9.519498161811074e-06, "loss": 0.681136965751648, "step": 11803 }, { "epoch": 14.483435582822086, "grad_norm": 0.2715151011943817, "learning_rate": 9.515557341594144e-06, "loss": 0.48497724533081055, "step": 11804 }, { "epoch": 14.484662576687116, "grad_norm": 0.29832732677459717, "learning_rate": 9.511617145510937e-06, "loss": 0.8235884308815002, "step": 11805 }, { "epoch": 14.485889570552148, "grad_norm": 0.24883785843849182, "learning_rate": 9.507677573720266e-06, "loss": 0.6670123338699341, "step": 11806 }, { "epoch": 14.487116564417178, "grad_norm": 0.26958730816841125, "learning_rate": 9.503738626380936e-06, "loss": 0.550085723400116, "step": 11807 }, { "epoch": 14.488343558282208, "grad_norm": 0.2588229179382324, "learning_rate": 9.499800303651698e-06, "loss": 0.5846948623657227, "step": 11808 }, { "epoch": 14.489570552147239, "grad_norm": 0.25563815236091614, "learning_rate": 9.4958626056913e-06, "loss": 0.48474419116973877, "step": 11809 }, { "epoch": 14.49079754601227, "grad_norm": 0.2772146463394165, "learning_rate": 9.491925532658463e-06, "loss": 0.699684739112854, "step": 11810 }, { "epoch": 14.4920245398773, "grad_norm": 0.2862565219402313, "learning_rate": 9.487989084711876e-06, "loss": 0.6714613437652588, "step": 11811 }, { "epoch": 14.493251533742331, "grad_norm": 0.2607727646827698, "learning_rate": 9.484053262010217e-06, "loss": 0.610944390296936, "step": 11812 }, { "epoch": 14.494478527607361, "grad_norm": 0.2344786822795868, "learning_rate": 9.48011806471211e-06, "loss": 0.6854819059371948, "step": 11813 }, { "epoch": 14.495705521472393, "grad_norm": 0.32441166043281555, "learning_rate": 9.476183492976177e-06, "loss": 0.6639204025268555, "step": 11814 }, { "epoch": 14.496932515337424, "grad_norm": 0.2612432837486267, "learning_rate": 9.472249546961015e-06, "loss": 0.6335872411727905, "step": 11815 }, { "epoch": 14.498159509202454, "grad_norm": 0.2645092010498047, "learning_rate": 9.468316226825185e-06, "loss": 0.6316390037536621, "step": 11816 }, { "epoch": 14.499386503067484, "grad_norm": 0.21764536201953888, "learning_rate": 9.464383532727239e-06, "loss": 0.5116614699363708, "step": 11817 }, { "epoch": 14.500613496932516, "grad_norm": 0.31945866346359253, "learning_rate": 9.460451464825675e-06, "loss": 0.6084119081497192, "step": 11818 }, { "epoch": 14.501840490797546, "grad_norm": 0.25932446122169495, "learning_rate": 9.456520023278994e-06, "loss": 0.6685522794723511, "step": 11819 }, { "epoch": 14.503067484662576, "grad_norm": 0.23991116881370544, "learning_rate": 9.45258920824566e-06, "loss": 0.584761381149292, "step": 11820 }, { "epoch": 14.504294478527607, "grad_norm": 0.22442063689231873, "learning_rate": 9.44865901988411e-06, "loss": 0.4896984398365021, "step": 11821 }, { "epoch": 14.505521472392639, "grad_norm": 0.24551789462566376, "learning_rate": 9.444729458352764e-06, "loss": 0.4849543273448944, "step": 11822 }, { "epoch": 14.506748466257669, "grad_norm": 0.28697919845581055, "learning_rate": 9.440800523810006e-06, "loss": 0.47968995571136475, "step": 11823 }, { "epoch": 14.5079754601227, "grad_norm": 0.30877307057380676, "learning_rate": 9.436872216414202e-06, "loss": 0.7123206257820129, "step": 11824 }, { "epoch": 14.50920245398773, "grad_norm": 0.3319644629955292, "learning_rate": 9.432944536323692e-06, "loss": 0.3925880789756775, "step": 11825 }, { "epoch": 14.510429447852761, "grad_norm": 0.2585448920726776, "learning_rate": 9.429017483696793e-06, "loss": 0.5866252183914185, "step": 11826 }, { "epoch": 14.511656441717792, "grad_norm": 0.27920645475387573, "learning_rate": 9.425091058691793e-06, "loss": 0.5513052940368652, "step": 11827 }, { "epoch": 14.512883435582822, "grad_norm": 0.25455132126808167, "learning_rate": 9.421165261466946e-06, "loss": 0.5429762601852417, "step": 11828 }, { "epoch": 14.514110429447852, "grad_norm": 0.25905516743659973, "learning_rate": 9.417240092180493e-06, "loss": 0.7493897080421448, "step": 11829 }, { "epoch": 14.515337423312884, "grad_norm": 0.2581912875175476, "learning_rate": 9.41331555099065e-06, "loss": 0.6141669750213623, "step": 11830 }, { "epoch": 14.516564417177914, "grad_norm": 0.310443252325058, "learning_rate": 9.409391638055601e-06, "loss": 0.7819204926490784, "step": 11831 }, { "epoch": 14.517791411042944, "grad_norm": 0.28934600949287415, "learning_rate": 9.40546835353352e-06, "loss": 0.5672523379325867, "step": 11832 }, { "epoch": 14.519018404907975, "grad_norm": 0.24418307840824127, "learning_rate": 9.401545697582523e-06, "loss": 0.6756708025932312, "step": 11833 }, { "epoch": 14.520245398773007, "grad_norm": 0.32192447781562805, "learning_rate": 9.39762367036073e-06, "loss": 0.8510699272155762, "step": 11834 }, { "epoch": 14.521472392638037, "grad_norm": 0.28894829750061035, "learning_rate": 9.393702272026229e-06, "loss": 0.2821674942970276, "step": 11835 }, { "epoch": 14.522699386503067, "grad_norm": 0.22520694136619568, "learning_rate": 9.389781502737077e-06, "loss": 0.7007315158843994, "step": 11836 }, { "epoch": 14.523926380368097, "grad_norm": 0.2917502224445343, "learning_rate": 9.385861362651321e-06, "loss": 0.4028226137161255, "step": 11837 }, { "epoch": 14.52515337423313, "grad_norm": 0.26055800914764404, "learning_rate": 9.38194185192695e-06, "loss": 0.8220758438110352, "step": 11838 }, { "epoch": 14.52638036809816, "grad_norm": 0.2589694857597351, "learning_rate": 9.37802297072196e-06, "loss": 0.6290596723556519, "step": 11839 }, { "epoch": 14.52760736196319, "grad_norm": 0.3005844056606293, "learning_rate": 9.37410471919431e-06, "loss": 0.7614719271659851, "step": 11840 }, { "epoch": 14.52883435582822, "grad_norm": 0.29179346561431885, "learning_rate": 9.370187097501932e-06, "loss": 0.6615824699401855, "step": 11841 }, { "epoch": 14.530061349693252, "grad_norm": 0.3873053193092346, "learning_rate": 9.366270105802741e-06, "loss": 0.6874691247940063, "step": 11842 }, { "epoch": 14.531288343558282, "grad_norm": 0.3242568075656891, "learning_rate": 9.362353744254607e-06, "loss": 0.49378883838653564, "step": 11843 }, { "epoch": 14.532515337423312, "grad_norm": 0.3470313251018524, "learning_rate": 9.358438013015392e-06, "loss": 0.4733317494392395, "step": 11844 }, { "epoch": 14.533742331288344, "grad_norm": 1.3925937414169312, "learning_rate": 9.354522912242922e-06, "loss": 0.5280675888061523, "step": 11845 }, { "epoch": 14.534969325153375, "grad_norm": 0.22518852353096008, "learning_rate": 9.350608442095027e-06, "loss": 0.6288564801216125, "step": 11846 }, { "epoch": 14.536196319018405, "grad_norm": 0.25009021162986755, "learning_rate": 9.346694602729463e-06, "loss": 0.5636303424835205, "step": 11847 }, { "epoch": 14.537423312883435, "grad_norm": 0.25868263840675354, "learning_rate": 9.342781394303999e-06, "loss": 0.6585736274719238, "step": 11848 }, { "epoch": 14.538650306748465, "grad_norm": 0.2959892153739929, "learning_rate": 9.338868816976356e-06, "loss": 0.6465610265731812, "step": 11849 }, { "epoch": 14.539877300613497, "grad_norm": 0.27539587020874023, "learning_rate": 9.33495687090425e-06, "loss": 0.41643479466438293, "step": 11850 }, { "epoch": 14.541104294478528, "grad_norm": 0.29271215200424194, "learning_rate": 9.331045556245358e-06, "loss": 0.7356770038604736, "step": 11851 }, { "epoch": 14.542331288343558, "grad_norm": 0.24383002519607544, "learning_rate": 9.327134873157323e-06, "loss": 0.48030996322631836, "step": 11852 }, { "epoch": 14.54355828220859, "grad_norm": 0.3949355185031891, "learning_rate": 9.323224821797782e-06, "loss": 0.6160870790481567, "step": 11853 }, { "epoch": 14.54478527607362, "grad_norm": 0.279994934797287, "learning_rate": 9.319315402324336e-06, "loss": 0.7567717432975769, "step": 11854 }, { "epoch": 14.54601226993865, "grad_norm": 0.2455710619688034, "learning_rate": 9.315406614894564e-06, "loss": 0.6346160173416138, "step": 11855 }, { "epoch": 14.54723926380368, "grad_norm": 0.2504456639289856, "learning_rate": 9.311498459666027e-06, "loss": 0.5190522074699402, "step": 11856 }, { "epoch": 14.548466257668712, "grad_norm": 0.2789161205291748, "learning_rate": 9.307590936796232e-06, "loss": 0.7197632789611816, "step": 11857 }, { "epoch": 14.549693251533743, "grad_norm": 0.269919216632843, "learning_rate": 9.30368404644269e-06, "loss": 0.6156551837921143, "step": 11858 }, { "epoch": 14.550920245398773, "grad_norm": 0.2768617272377014, "learning_rate": 9.299777788762875e-06, "loss": 0.6604628562927246, "step": 11859 }, { "epoch": 14.552147239263803, "grad_norm": 0.2613345980644226, "learning_rate": 9.295872163914243e-06, "loss": 0.8093846440315247, "step": 11860 }, { "epoch": 14.553374233128835, "grad_norm": 0.24762558937072754, "learning_rate": 9.291967172054223e-06, "loss": 0.7762954831123352, "step": 11861 }, { "epoch": 14.554601226993865, "grad_norm": 0.259147584438324, "learning_rate": 9.288062813340192e-06, "loss": 0.567660927772522, "step": 11862 }, { "epoch": 14.555828220858896, "grad_norm": 0.3049447536468506, "learning_rate": 9.284159087929543e-06, "loss": 0.2681196928024292, "step": 11863 }, { "epoch": 14.557055214723926, "grad_norm": 0.3033803105354309, "learning_rate": 9.280255995979614e-06, "loss": 0.7339498996734619, "step": 11864 }, { "epoch": 14.558282208588958, "grad_norm": 0.26877301931381226, "learning_rate": 9.276353537647734e-06, "loss": 0.6481333374977112, "step": 11865 }, { "epoch": 14.559509202453988, "grad_norm": 0.33092302083969116, "learning_rate": 9.272451713091205e-06, "loss": 0.7120564579963684, "step": 11866 }, { "epoch": 14.560736196319018, "grad_norm": 0.2926606833934784, "learning_rate": 9.268550522467275e-06, "loss": 0.5712480545043945, "step": 11867 }, { "epoch": 14.561963190184048, "grad_norm": 0.2834417521953583, "learning_rate": 9.264649965933214e-06, "loss": 0.36219894886016846, "step": 11868 }, { "epoch": 14.56319018404908, "grad_norm": 0.2609507441520691, "learning_rate": 9.260750043646233e-06, "loss": 0.26492148637771606, "step": 11869 }, { "epoch": 14.56441717791411, "grad_norm": 0.292341411113739, "learning_rate": 9.256850755763539e-06, "loss": 0.5310416221618652, "step": 11870 }, { "epoch": 14.565644171779141, "grad_norm": 0.2840244472026825, "learning_rate": 9.25295210244228e-06, "loss": 0.7075860500335693, "step": 11871 }, { "epoch": 14.566871165644171, "grad_norm": 0.26308169960975647, "learning_rate": 9.24905408383961e-06, "loss": 0.3968117833137512, "step": 11872 }, { "epoch": 14.568098159509203, "grad_norm": 0.3022506833076477, "learning_rate": 9.245156700112647e-06, "loss": 0.6923583149909973, "step": 11873 }, { "epoch": 14.569325153374233, "grad_norm": 0.22791756689548492, "learning_rate": 9.241259951418483e-06, "loss": 0.5113426446914673, "step": 11874 }, { "epoch": 14.570552147239264, "grad_norm": 0.2767876088619232, "learning_rate": 9.237363837914193e-06, "loss": 0.6447065472602844, "step": 11875 }, { "epoch": 14.571779141104294, "grad_norm": 0.27316170930862427, "learning_rate": 9.233468359756803e-06, "loss": 0.8161126375198364, "step": 11876 }, { "epoch": 14.573006134969326, "grad_norm": 0.2763059437274933, "learning_rate": 9.229573517103335e-06, "loss": 0.5390084981918335, "step": 11877 }, { "epoch": 14.574233128834356, "grad_norm": 0.250725120306015, "learning_rate": 9.22567931011078e-06, "loss": 0.47397691011428833, "step": 11878 }, { "epoch": 14.575460122699386, "grad_norm": 0.26256251335144043, "learning_rate": 9.221785738936103e-06, "loss": 0.5547425150871277, "step": 11879 }, { "epoch": 14.576687116564417, "grad_norm": 0.2575515806674957, "learning_rate": 9.21789280373625e-06, "loss": 0.5927395820617676, "step": 11880 }, { "epoch": 14.577914110429449, "grad_norm": 0.2739813029766083, "learning_rate": 9.214000504668115e-06, "loss": 0.7648072838783264, "step": 11881 }, { "epoch": 14.579141104294479, "grad_norm": 0.24187859892845154, "learning_rate": 9.210108841888601e-06, "loss": 0.5922518968582153, "step": 11882 }, { "epoch": 14.580368098159509, "grad_norm": 0.27260032296180725, "learning_rate": 9.206217815554563e-06, "loss": 0.3909223973751068, "step": 11883 }, { "epoch": 14.58159509202454, "grad_norm": 0.2319629043340683, "learning_rate": 9.20232742582284e-06, "loss": 0.6244817972183228, "step": 11884 }, { "epoch": 14.582822085889571, "grad_norm": 0.2779448628425598, "learning_rate": 9.198437672850248e-06, "loss": 0.702998161315918, "step": 11885 }, { "epoch": 14.584049079754601, "grad_norm": 0.2786218822002411, "learning_rate": 9.194548556793562e-06, "loss": 0.6377198696136475, "step": 11886 }, { "epoch": 14.585276073619632, "grad_norm": 0.23765002191066742, "learning_rate": 9.190660077809543e-06, "loss": 0.6303846836090088, "step": 11887 }, { "epoch": 14.586503067484662, "grad_norm": 0.2839490473270416, "learning_rate": 9.186772236054928e-06, "loss": 0.6467804312705994, "step": 11888 }, { "epoch": 14.587730061349694, "grad_norm": 0.3578345775604248, "learning_rate": 9.182885031686422e-06, "loss": 0.6737513542175293, "step": 11889 }, { "epoch": 14.588957055214724, "grad_norm": 0.2792655825614929, "learning_rate": 9.178998464860708e-06, "loss": 0.830718457698822, "step": 11890 }, { "epoch": 14.590184049079754, "grad_norm": 0.27722105383872986, "learning_rate": 9.175112535734442e-06, "loss": 0.6091527938842773, "step": 11891 }, { "epoch": 14.591411042944785, "grad_norm": 0.6696838736534119, "learning_rate": 9.17122724446426e-06, "loss": 0.5638253688812256, "step": 11892 }, { "epoch": 14.592638036809817, "grad_norm": 0.30759432911872864, "learning_rate": 9.16734259120676e-06, "loss": 0.3923593759536743, "step": 11893 }, { "epoch": 14.593865030674847, "grad_norm": 0.2987576127052307, "learning_rate": 9.16345857611853e-06, "loss": 0.4288387596607208, "step": 11894 }, { "epoch": 14.595092024539877, "grad_norm": 0.25347134470939636, "learning_rate": 9.159575199356114e-06, "loss": 0.5978169441223145, "step": 11895 }, { "epoch": 14.596319018404907, "grad_norm": 0.2557547092437744, "learning_rate": 9.15569246107604e-06, "loss": 0.40108442306518555, "step": 11896 }, { "epoch": 14.59754601226994, "grad_norm": 0.27947017550468445, "learning_rate": 9.151810361434815e-06, "loss": 0.6008612513542175, "step": 11897 }, { "epoch": 14.59877300613497, "grad_norm": 0.28160104155540466, "learning_rate": 9.147928900588912e-06, "loss": 0.5063933730125427, "step": 11898 }, { "epoch": 14.6, "grad_norm": 0.25211530923843384, "learning_rate": 9.144048078694794e-06, "loss": 0.6265649795532227, "step": 11899 }, { "epoch": 14.60122699386503, "grad_norm": 0.22497881948947906, "learning_rate": 9.140167895908867e-06, "loss": 0.3758583664894104, "step": 11900 }, { "epoch": 14.602453987730062, "grad_norm": 0.2842586040496826, "learning_rate": 9.136288352387537e-06, "loss": 0.664172887802124, "step": 11901 }, { "epoch": 14.603680981595092, "grad_norm": 0.30307918787002563, "learning_rate": 9.13240944828718e-06, "loss": 0.6586946249008179, "step": 11902 }, { "epoch": 14.604907975460122, "grad_norm": 0.2378753274679184, "learning_rate": 9.12853118376414e-06, "loss": 0.34759074449539185, "step": 11903 }, { "epoch": 14.606134969325154, "grad_norm": 0.2981560230255127, "learning_rate": 9.124653558974752e-06, "loss": 0.7701011300086975, "step": 11904 }, { "epoch": 14.607361963190185, "grad_norm": 0.29036590456962585, "learning_rate": 9.120776574075293e-06, "loss": 0.582547664642334, "step": 11905 }, { "epoch": 14.608588957055215, "grad_norm": 0.25818774104118347, "learning_rate": 9.116900229222041e-06, "loss": 0.6556161046028137, "step": 11906 }, { "epoch": 14.609815950920245, "grad_norm": 0.2588243782520294, "learning_rate": 9.11302452457124e-06, "loss": 0.7815153002738953, "step": 11907 }, { "epoch": 14.611042944785275, "grad_norm": 0.2555122971534729, "learning_rate": 9.10914946027911e-06, "loss": 0.6961438655853271, "step": 11908 }, { "epoch": 14.612269938650307, "grad_norm": 0.2537943124771118, "learning_rate": 9.10527503650185e-06, "loss": 0.5191755890846252, "step": 11909 }, { "epoch": 14.613496932515337, "grad_norm": 0.2743007242679596, "learning_rate": 9.101401253395614e-06, "loss": 0.6015865802764893, "step": 11910 }, { "epoch": 14.614723926380368, "grad_norm": 0.2838176190853119, "learning_rate": 9.097528111116549e-06, "loss": 0.6093318462371826, "step": 11911 }, { "epoch": 14.6159509202454, "grad_norm": 0.24066707491874695, "learning_rate": 9.093655609820761e-06, "loss": 0.4910968244075775, "step": 11912 }, { "epoch": 14.61717791411043, "grad_norm": 0.3018009662628174, "learning_rate": 9.089783749664366e-06, "loss": 0.4435844421386719, "step": 11913 }, { "epoch": 14.61840490797546, "grad_norm": 0.30884307622909546, "learning_rate": 9.085912530803403e-06, "loss": 0.5861457586288452, "step": 11914 }, { "epoch": 14.61963190184049, "grad_norm": 0.27418383955955505, "learning_rate": 9.082041953393917e-06, "loss": 0.6529459357261658, "step": 11915 }, { "epoch": 14.62085889570552, "grad_norm": 0.25775063037872314, "learning_rate": 9.078172017591918e-06, "loss": 0.5753363966941833, "step": 11916 }, { "epoch": 14.622085889570553, "grad_norm": 0.30084216594696045, "learning_rate": 9.074302723553398e-06, "loss": 0.5934033393859863, "step": 11917 }, { "epoch": 14.623312883435583, "grad_norm": 0.26938897371292114, "learning_rate": 9.070434071434319e-06, "loss": 0.8017957210540771, "step": 11918 }, { "epoch": 14.624539877300613, "grad_norm": 0.2740583121776581, "learning_rate": 9.066566061390604e-06, "loss": 0.5408639907836914, "step": 11919 }, { "epoch": 14.625766871165645, "grad_norm": 0.2574388086795807, "learning_rate": 9.062698693578167e-06, "loss": 0.587605357170105, "step": 11920 }, { "epoch": 14.626993865030675, "grad_norm": 0.3982428014278412, "learning_rate": 9.058831968152892e-06, "loss": 0.8865779638290405, "step": 11921 }, { "epoch": 14.628220858895705, "grad_norm": 0.2944190204143524, "learning_rate": 9.054965885270633e-06, "loss": 0.4865642189979553, "step": 11922 }, { "epoch": 14.629447852760736, "grad_norm": 0.35763052105903625, "learning_rate": 9.051100445087232e-06, "loss": 0.6811774373054504, "step": 11923 }, { "epoch": 14.630674846625768, "grad_norm": 0.25838157534599304, "learning_rate": 9.047235647758476e-06, "loss": 0.549130916595459, "step": 11924 }, { "epoch": 14.631901840490798, "grad_norm": 0.2575950026512146, "learning_rate": 9.043371493440153e-06, "loss": 0.5579609274864197, "step": 11925 }, { "epoch": 14.633128834355828, "grad_norm": 0.23452046513557434, "learning_rate": 9.039507982288015e-06, "loss": 0.5367724299430847, "step": 11926 }, { "epoch": 14.634355828220858, "grad_norm": 0.30837878584861755, "learning_rate": 9.035645114457789e-06, "loss": 0.3809031844139099, "step": 11927 }, { "epoch": 14.63558282208589, "grad_norm": 0.2910309135913849, "learning_rate": 9.031782890105175e-06, "loss": 0.6536710262298584, "step": 11928 }, { "epoch": 14.63680981595092, "grad_norm": 0.27619433403015137, "learning_rate": 9.027921309385861e-06, "loss": 0.29942017793655396, "step": 11929 }, { "epoch": 14.63803680981595, "grad_norm": 0.3026379942893982, "learning_rate": 9.024060372455475e-06, "loss": 0.6399871110916138, "step": 11930 }, { "epoch": 14.639263803680981, "grad_norm": 0.2668939530849457, "learning_rate": 9.020200079469651e-06, "loss": 0.6128946542739868, "step": 11931 }, { "epoch": 14.640490797546013, "grad_norm": 0.24826763570308685, "learning_rate": 9.016340430583986e-06, "loss": 0.7149607539176941, "step": 11932 }, { "epoch": 14.641717791411043, "grad_norm": 0.2506355047225952, "learning_rate": 9.012481425954053e-06, "loss": 0.6694777011871338, "step": 11933 }, { "epoch": 14.642944785276073, "grad_norm": 0.2900720536708832, "learning_rate": 9.008623065735395e-06, "loss": 0.7017308473587036, "step": 11934 }, { "epoch": 14.644171779141104, "grad_norm": 0.23249562084674835, "learning_rate": 9.00476535008353e-06, "loss": 0.4901745915412903, "step": 11935 }, { "epoch": 14.645398773006136, "grad_norm": 0.2890225648880005, "learning_rate": 9.000908279153957e-06, "loss": 0.5091648697853088, "step": 11936 }, { "epoch": 14.646625766871166, "grad_norm": 0.2424631267786026, "learning_rate": 8.99705185310214e-06, "loss": 0.40398645401000977, "step": 11937 }, { "epoch": 14.647852760736196, "grad_norm": 0.2862500846385956, "learning_rate": 8.993196072083528e-06, "loss": 0.6912834644317627, "step": 11938 }, { "epoch": 14.649079754601226, "grad_norm": 0.3339638411998749, "learning_rate": 8.989340936253521e-06, "loss": 0.5201306343078613, "step": 11939 }, { "epoch": 14.650306748466258, "grad_norm": 0.2652077376842499, "learning_rate": 8.985486445767518e-06, "loss": 0.628823459148407, "step": 11940 }, { "epoch": 14.651533742331289, "grad_norm": 0.2496885359287262, "learning_rate": 8.98163260078088e-06, "loss": 0.4284096658229828, "step": 11941 }, { "epoch": 14.652760736196319, "grad_norm": 0.2648942768573761, "learning_rate": 8.97777940144895e-06, "loss": 0.6904215216636658, "step": 11942 }, { "epoch": 14.653987730061349, "grad_norm": 0.2672322392463684, "learning_rate": 8.973926847927041e-06, "loss": 0.7137519121170044, "step": 11943 }, { "epoch": 14.655214723926381, "grad_norm": 0.3625229597091675, "learning_rate": 8.970074940370424e-06, "loss": 0.7653409242630005, "step": 11944 }, { "epoch": 14.656441717791411, "grad_norm": 0.2748664915561676, "learning_rate": 8.966223678934368e-06, "loss": 0.44749927520751953, "step": 11945 }, { "epoch": 14.657668711656441, "grad_norm": 0.38597074151039124, "learning_rate": 8.962373063774107e-06, "loss": 0.36699196696281433, "step": 11946 }, { "epoch": 14.658895705521472, "grad_norm": 0.2427927702665329, "learning_rate": 8.958523095044846e-06, "loss": 0.6874947547912598, "step": 11947 }, { "epoch": 14.660122699386504, "grad_norm": 0.30947041511535645, "learning_rate": 8.954673772901776e-06, "loss": 0.5867311358451843, "step": 11948 }, { "epoch": 14.661349693251534, "grad_norm": 0.2587636411190033, "learning_rate": 8.950825097500034e-06, "loss": 0.4071202576160431, "step": 11949 }, { "epoch": 14.662576687116564, "grad_norm": 0.2545776963233948, "learning_rate": 8.94697706899476e-06, "loss": 0.4358859956264496, "step": 11950 }, { "epoch": 14.663803680981594, "grad_norm": 0.3670925199985504, "learning_rate": 8.943129687541055e-06, "loss": 0.416217565536499, "step": 11951 }, { "epoch": 14.665030674846626, "grad_norm": 0.2623029053211212, "learning_rate": 8.939282953293998e-06, "loss": 0.6689559817314148, "step": 11952 }, { "epoch": 14.666257668711657, "grad_norm": 0.21686673164367676, "learning_rate": 8.935436866408644e-06, "loss": 0.4938969612121582, "step": 11953 }, { "epoch": 14.667484662576687, "grad_norm": 0.31885236501693726, "learning_rate": 8.931591427040007e-06, "loss": 0.6270241141319275, "step": 11954 }, { "epoch": 14.668711656441717, "grad_norm": 0.2514720857143402, "learning_rate": 8.92774663534309e-06, "loss": 0.6202991008758545, "step": 11955 }, { "epoch": 14.669938650306749, "grad_norm": 0.32219037413597107, "learning_rate": 8.923902491472858e-06, "loss": 0.5943511724472046, "step": 11956 }, { "epoch": 14.67116564417178, "grad_norm": 0.2710893750190735, "learning_rate": 8.920058995584282e-06, "loss": 0.765148401260376, "step": 11957 }, { "epoch": 14.67239263803681, "grad_norm": 0.27875903248786926, "learning_rate": 8.91621614783226e-06, "loss": 0.6931391954421997, "step": 11958 }, { "epoch": 14.67361963190184, "grad_norm": 0.26499465107917786, "learning_rate": 8.912373948371692e-06, "loss": 0.5845295190811157, "step": 11959 }, { "epoch": 14.674846625766872, "grad_norm": 0.2820885181427002, "learning_rate": 8.908532397357444e-06, "loss": 0.8758381605148315, "step": 11960 }, { "epoch": 14.676073619631902, "grad_norm": 0.26982149481773376, "learning_rate": 8.904691494944365e-06, "loss": 0.667487621307373, "step": 11961 }, { "epoch": 14.677300613496932, "grad_norm": 0.27772247791290283, "learning_rate": 8.900851241287273e-06, "loss": 0.5508886575698853, "step": 11962 }, { "epoch": 14.678527607361962, "grad_norm": 0.27263346314430237, "learning_rate": 8.897011636540945e-06, "loss": 0.658994197845459, "step": 11963 }, { "epoch": 14.679754601226994, "grad_norm": 0.2981659471988678, "learning_rate": 8.893172680860148e-06, "loss": 0.6212149858474731, "step": 11964 }, { "epoch": 14.680981595092025, "grad_norm": 0.25501516461372375, "learning_rate": 8.889334374399621e-06, "loss": 0.8075821399688721, "step": 11965 }, { "epoch": 14.682208588957055, "grad_norm": 0.24958159029483795, "learning_rate": 8.885496717314079e-06, "loss": 0.623337984085083, "step": 11966 }, { "epoch": 14.683435582822085, "grad_norm": 0.3176383376121521, "learning_rate": 8.881659709758213e-06, "loss": 0.7084196209907532, "step": 11967 }, { "epoch": 14.684662576687117, "grad_norm": 0.30679386854171753, "learning_rate": 8.877823351886663e-06, "loss": 0.652012825012207, "step": 11968 }, { "epoch": 14.685889570552147, "grad_norm": 0.27538931369781494, "learning_rate": 8.873987643854073e-06, "loss": 0.5496338605880737, "step": 11969 }, { "epoch": 14.687116564417177, "grad_norm": 0.30625295639038086, "learning_rate": 8.870152585815048e-06, "loss": 0.6741921901702881, "step": 11970 }, { "epoch": 14.68834355828221, "grad_norm": 0.2716098725795746, "learning_rate": 8.866318177924167e-06, "loss": 0.6223552227020264, "step": 11971 }, { "epoch": 14.68957055214724, "grad_norm": 0.3437892198562622, "learning_rate": 8.862484420335995e-06, "loss": 0.5963851809501648, "step": 11972 }, { "epoch": 14.69079754601227, "grad_norm": 0.2330833524465561, "learning_rate": 8.858651313205043e-06, "loss": 0.5452078580856323, "step": 11973 }, { "epoch": 14.6920245398773, "grad_norm": 0.26560452580451965, "learning_rate": 8.85481885668582e-06, "loss": 0.5523762702941895, "step": 11974 }, { "epoch": 14.69325153374233, "grad_norm": 0.289035826921463, "learning_rate": 8.850987050932799e-06, "loss": 0.410727858543396, "step": 11975 }, { "epoch": 14.694478527607362, "grad_norm": 0.29060590267181396, "learning_rate": 8.847155896100433e-06, "loss": 0.8373019695281982, "step": 11976 }, { "epoch": 14.695705521472393, "grad_norm": 0.2853033244609833, "learning_rate": 8.843325392343155e-06, "loss": 0.6394253969192505, "step": 11977 }, { "epoch": 14.696932515337423, "grad_norm": 0.3011186122894287, "learning_rate": 8.83949553981533e-06, "loss": 0.6107190251350403, "step": 11978 }, { "epoch": 14.698159509202455, "grad_norm": 0.2844618856906891, "learning_rate": 8.835666338671356e-06, "loss": 0.648176908493042, "step": 11979 }, { "epoch": 14.699386503067485, "grad_norm": 0.2612442374229431, "learning_rate": 8.831837789065572e-06, "loss": 0.7035700082778931, "step": 11980 }, { "epoch": 14.700613496932515, "grad_norm": 0.2552800178527832, "learning_rate": 8.8280098911523e-06, "loss": 0.5187544822692871, "step": 11981 }, { "epoch": 14.701840490797546, "grad_norm": 0.2492353618144989, "learning_rate": 8.824182645085819e-06, "loss": 0.4440148174762726, "step": 11982 }, { "epoch": 14.703067484662578, "grad_norm": 0.27020078897476196, "learning_rate": 8.8203560510204e-06, "loss": 0.7266055941581726, "step": 11983 }, { "epoch": 14.704294478527608, "grad_norm": 0.31740450859069824, "learning_rate": 8.816530109110283e-06, "loss": 0.8088156580924988, "step": 11984 }, { "epoch": 14.705521472392638, "grad_norm": 0.28876879811286926, "learning_rate": 8.81270481950968e-06, "loss": 0.6168429851531982, "step": 11985 }, { "epoch": 14.706748466257668, "grad_norm": 0.2864735424518585, "learning_rate": 8.808880182372787e-06, "loss": 0.6451001167297363, "step": 11986 }, { "epoch": 14.7079754601227, "grad_norm": 0.28919997811317444, "learning_rate": 8.805056197853748e-06, "loss": 0.6122536659240723, "step": 11987 }, { "epoch": 14.70920245398773, "grad_norm": 0.25178951025009155, "learning_rate": 8.801232866106702e-06, "loss": 0.7214939594268799, "step": 11988 }, { "epoch": 14.71042944785276, "grad_norm": 0.28261008858680725, "learning_rate": 8.79741018728576e-06, "loss": 0.6077122688293457, "step": 11989 }, { "epoch": 14.71165644171779, "grad_norm": 0.3141306936740875, "learning_rate": 8.793588161545003e-06, "loss": 0.63170325756073, "step": 11990 }, { "epoch": 14.712883435582823, "grad_norm": 0.2368607372045517, "learning_rate": 8.789766789038492e-06, "loss": 0.5772450566291809, "step": 11991 }, { "epoch": 14.714110429447853, "grad_norm": 0.2938065826892853, "learning_rate": 8.78594606992024e-06, "loss": 0.4085332155227661, "step": 11992 }, { "epoch": 14.715337423312883, "grad_norm": 0.2569686770439148, "learning_rate": 8.782126004344257e-06, "loss": 0.7981553077697754, "step": 11993 }, { "epoch": 14.716564417177914, "grad_norm": 0.2756098806858063, "learning_rate": 8.778306592464522e-06, "loss": 0.5718073844909668, "step": 11994 }, { "epoch": 14.717791411042946, "grad_norm": 0.24123543500900269, "learning_rate": 8.774487834434984e-06, "loss": 0.45951807498931885, "step": 11995 }, { "epoch": 14.719018404907976, "grad_norm": 0.26200491189956665, "learning_rate": 8.770669730409569e-06, "loss": 0.7610708475112915, "step": 11996 }, { "epoch": 14.720245398773006, "grad_norm": 0.27051815390586853, "learning_rate": 8.766852280542165e-06, "loss": 0.6684575080871582, "step": 11997 }, { "epoch": 14.721472392638036, "grad_norm": 0.3345138728618622, "learning_rate": 8.763035484986645e-06, "loss": 0.6053003072738647, "step": 11998 }, { "epoch": 14.722699386503068, "grad_norm": 0.25149208307266235, "learning_rate": 8.759219343896857e-06, "loss": 0.6582318544387817, "step": 11999 }, { "epoch": 14.723926380368098, "grad_norm": 0.26512768864631653, "learning_rate": 8.755403857426617e-06, "loss": 0.5734290480613708, "step": 12000 }, { "epoch": 14.725153374233129, "grad_norm": 0.2788786292076111, "learning_rate": 8.751589025729714e-06, "loss": 0.6127840876579285, "step": 12001 }, { "epoch": 14.726380368098159, "grad_norm": 0.23273056745529175, "learning_rate": 8.747774848959918e-06, "loss": 0.5601441860198975, "step": 12002 }, { "epoch": 14.72760736196319, "grad_norm": 0.2797871530056, "learning_rate": 8.743961327270964e-06, "loss": 0.7137377262115479, "step": 12003 }, { "epoch": 14.728834355828221, "grad_norm": 0.2810499966144562, "learning_rate": 8.740148460816566e-06, "loss": 0.6422005891799927, "step": 12004 }, { "epoch": 14.730061349693251, "grad_norm": 0.2934170067310333, "learning_rate": 8.736336249750419e-06, "loss": 0.7719534039497375, "step": 12005 }, { "epoch": 14.731288343558282, "grad_norm": 0.2514602541923523, "learning_rate": 8.73252469422616e-06, "loss": 0.5206153392791748, "step": 12006 }, { "epoch": 14.732515337423314, "grad_norm": 0.2269064038991928, "learning_rate": 8.728713794397436e-06, "loss": 0.6055372953414917, "step": 12007 }, { "epoch": 14.733742331288344, "grad_norm": 0.26061245799064636, "learning_rate": 8.72490355041785e-06, "loss": 0.6537914872169495, "step": 12008 }, { "epoch": 14.734969325153374, "grad_norm": 0.23045778274536133, "learning_rate": 8.721093962440988e-06, "loss": 0.4610390067100525, "step": 12009 }, { "epoch": 14.736196319018404, "grad_norm": 0.28606536984443665, "learning_rate": 8.717285030620403e-06, "loss": 0.7243993282318115, "step": 12010 }, { "epoch": 14.737423312883436, "grad_norm": 0.2513737082481384, "learning_rate": 8.713476755109615e-06, "loss": 0.6363238096237183, "step": 12011 }, { "epoch": 14.738650306748466, "grad_norm": 0.272773414850235, "learning_rate": 8.709669136062127e-06, "loss": 0.6005602478981018, "step": 12012 }, { "epoch": 14.739877300613497, "grad_norm": 0.2755280137062073, "learning_rate": 8.705862173631412e-06, "loss": 0.5413097143173218, "step": 12013 }, { "epoch": 14.741104294478527, "grad_norm": 0.31283485889434814, "learning_rate": 8.702055867970924e-06, "loss": 0.4460334777832031, "step": 12014 }, { "epoch": 14.742331288343559, "grad_norm": 0.3042634129524231, "learning_rate": 8.698250219234086e-06, "loss": 0.3511772155761719, "step": 12015 }, { "epoch": 14.743558282208589, "grad_norm": 0.282568097114563, "learning_rate": 8.694445227574282e-06, "loss": 0.32779479026794434, "step": 12016 }, { "epoch": 14.74478527607362, "grad_norm": 0.23323121666908264, "learning_rate": 8.690640893144883e-06, "loss": 0.7630026340484619, "step": 12017 }, { "epoch": 14.74601226993865, "grad_norm": 0.26178300380706787, "learning_rate": 8.686837216099236e-06, "loss": 0.6507306098937988, "step": 12018 }, { "epoch": 14.747239263803682, "grad_norm": 0.26581013202667236, "learning_rate": 8.683034196590653e-06, "loss": 0.7909166812896729, "step": 12019 }, { "epoch": 14.748466257668712, "grad_norm": 0.23817336559295654, "learning_rate": 8.679231834772433e-06, "loss": 0.5628496408462524, "step": 12020 }, { "epoch": 14.749693251533742, "grad_norm": 0.2530103027820587, "learning_rate": 8.675430130797824e-06, "loss": 0.7089483737945557, "step": 12021 }, { "epoch": 14.750920245398772, "grad_norm": 0.25840452313423157, "learning_rate": 8.671629084820063e-06, "loss": 0.5284287333488464, "step": 12022 }, { "epoch": 14.752147239263804, "grad_norm": 0.29518958926200867, "learning_rate": 8.667828696992359e-06, "loss": 0.5264807343482971, "step": 12023 }, { "epoch": 14.753374233128834, "grad_norm": 0.2676773965358734, "learning_rate": 8.664028967467914e-06, "loss": 0.6781866550445557, "step": 12024 }, { "epoch": 14.754601226993865, "grad_norm": 0.2681151032447815, "learning_rate": 8.66022989639986e-06, "loss": 0.9125070571899414, "step": 12025 }, { "epoch": 14.755828220858895, "grad_norm": 0.26744765043258667, "learning_rate": 8.656431483941338e-06, "loss": 0.6452380418777466, "step": 12026 }, { "epoch": 14.757055214723927, "grad_norm": 0.26599234342575073, "learning_rate": 8.652633730245448e-06, "loss": 0.6553525924682617, "step": 12027 }, { "epoch": 14.758282208588957, "grad_norm": 0.24575835466384888, "learning_rate": 8.648836635465272e-06, "loss": 0.7478089332580566, "step": 12028 }, { "epoch": 14.759509202453987, "grad_norm": 0.25417405366897583, "learning_rate": 8.64504019975386e-06, "loss": 0.6809766292572021, "step": 12029 }, { "epoch": 14.76073619631902, "grad_norm": 0.2670629918575287, "learning_rate": 8.641244423264225e-06, "loss": 0.42072582244873047, "step": 12030 }, { "epoch": 14.76196319018405, "grad_norm": 0.3400450646877289, "learning_rate": 8.637449306149373e-06, "loss": 0.6884384751319885, "step": 12031 }, { "epoch": 14.76319018404908, "grad_norm": 0.2771473228931427, "learning_rate": 8.63365484856227e-06, "loss": 0.6101157069206238, "step": 12032 }, { "epoch": 14.76441717791411, "grad_norm": 0.24237844347953796, "learning_rate": 8.62986105065586e-06, "loss": 0.5109733939170837, "step": 12033 }, { "epoch": 14.76564417177914, "grad_norm": 0.276132196187973, "learning_rate": 8.626067912583064e-06, "loss": 0.4984630048274994, "step": 12034 }, { "epoch": 14.766871165644172, "grad_norm": 0.2733209729194641, "learning_rate": 8.622275434496779e-06, "loss": 0.7265174388885498, "step": 12035 }, { "epoch": 14.768098159509202, "grad_norm": 0.2918079197406769, "learning_rate": 8.618483616549852e-06, "loss": 0.5755734443664551, "step": 12036 }, { "epoch": 14.769325153374233, "grad_norm": 0.2771665155887604, "learning_rate": 8.61469245889513e-06, "loss": 0.6087185740470886, "step": 12037 }, { "epoch": 14.770552147239265, "grad_norm": 0.30790016055107117, "learning_rate": 8.610901961685421e-06, "loss": 0.7157363891601562, "step": 12038 }, { "epoch": 14.771779141104295, "grad_norm": 0.2582961916923523, "learning_rate": 8.60711212507351e-06, "loss": 0.7392315864562988, "step": 12039 }, { "epoch": 14.773006134969325, "grad_norm": 0.2510733902454376, "learning_rate": 8.603322949212164e-06, "loss": 0.5362374782562256, "step": 12040 }, { "epoch": 14.774233128834355, "grad_norm": 0.30017736554145813, "learning_rate": 8.5995344342541e-06, "loss": 0.40019354224205017, "step": 12041 }, { "epoch": 14.775460122699386, "grad_norm": 0.25406554341316223, "learning_rate": 8.595746580352024e-06, "loss": 0.6274434924125671, "step": 12042 }, { "epoch": 14.776687116564418, "grad_norm": 0.28478506207466125, "learning_rate": 8.59195938765862e-06, "loss": 0.5556448101997375, "step": 12043 }, { "epoch": 14.777914110429448, "grad_norm": 0.24870513379573822, "learning_rate": 8.588172856326532e-06, "loss": 0.6151476502418518, "step": 12044 }, { "epoch": 14.779141104294478, "grad_norm": 0.232038676738739, "learning_rate": 8.584386986508388e-06, "loss": 0.3309016823768616, "step": 12045 }, { "epoch": 14.78036809815951, "grad_norm": 0.26753076910972595, "learning_rate": 8.58060177835679e-06, "loss": 0.38631123304367065, "step": 12046 }, { "epoch": 14.78159509202454, "grad_norm": 0.23242995142936707, "learning_rate": 8.5768172320243e-06, "loss": 0.5546809434890747, "step": 12047 }, { "epoch": 14.78282208588957, "grad_norm": 0.2799045741558075, "learning_rate": 8.57303334766347e-06, "loss": 0.6195712089538574, "step": 12048 }, { "epoch": 14.7840490797546, "grad_norm": 0.9407786726951599, "learning_rate": 8.569250125426822e-06, "loss": 0.5381914377212524, "step": 12049 }, { "epoch": 14.785276073619633, "grad_norm": 0.2562386095523834, "learning_rate": 8.565467565466833e-06, "loss": 0.6357156038284302, "step": 12050 }, { "epoch": 14.786503067484663, "grad_norm": 0.21543309092521667, "learning_rate": 8.561685667935973e-06, "loss": 0.5453197956085205, "step": 12051 }, { "epoch": 14.787730061349693, "grad_norm": 0.2673155665397644, "learning_rate": 8.557904432986677e-06, "loss": 0.5855836272239685, "step": 12052 }, { "epoch": 14.788957055214723, "grad_norm": 0.2893446385860443, "learning_rate": 8.554123860771362e-06, "loss": 0.44134610891342163, "step": 12053 }, { "epoch": 14.790184049079755, "grad_norm": 0.2541184425354004, "learning_rate": 8.550343951442415e-06, "loss": 0.5374327301979065, "step": 12054 }, { "epoch": 14.791411042944786, "grad_norm": 0.2815825641155243, "learning_rate": 8.546564705152183e-06, "loss": 0.5987974405288696, "step": 12055 }, { "epoch": 14.792638036809816, "grad_norm": 0.28373071551322937, "learning_rate": 8.542786122052998e-06, "loss": 0.7268939018249512, "step": 12056 }, { "epoch": 14.793865030674846, "grad_norm": 0.3090057373046875, "learning_rate": 8.53900820229717e-06, "loss": 0.6569361686706543, "step": 12057 }, { "epoch": 14.795092024539878, "grad_norm": 0.23577429354190826, "learning_rate": 8.53523094603697e-06, "loss": 0.5518434047698975, "step": 12058 }, { "epoch": 14.796319018404908, "grad_norm": 0.24715402722358704, "learning_rate": 8.531454353424662e-06, "loss": 0.5239966511726379, "step": 12059 }, { "epoch": 14.797546012269938, "grad_norm": 0.24071088433265686, "learning_rate": 8.527678424612447e-06, "loss": 0.5973967909812927, "step": 12060 }, { "epoch": 14.798773006134969, "grad_norm": 0.29396310448646545, "learning_rate": 8.523903159752538e-06, "loss": 0.7441619634628296, "step": 12061 }, { "epoch": 14.8, "grad_norm": 0.3245292901992798, "learning_rate": 8.520128558997101e-06, "loss": 0.5116390585899353, "step": 12062 }, { "epoch": 14.801226993865031, "grad_norm": 0.29046475887298584, "learning_rate": 8.51635462249828e-06, "loss": 0.6369812488555908, "step": 12063 }, { "epoch": 14.802453987730061, "grad_norm": 0.2594591975212097, "learning_rate": 8.512581350408196e-06, "loss": 0.6710244417190552, "step": 12064 }, { "epoch": 14.803680981595091, "grad_norm": 0.29621466994285583, "learning_rate": 8.508808742878929e-06, "loss": 0.6068179607391357, "step": 12065 }, { "epoch": 14.804907975460123, "grad_norm": 0.2912151515483856, "learning_rate": 8.505036800062549e-06, "loss": 0.721489429473877, "step": 12066 }, { "epoch": 14.806134969325154, "grad_norm": 0.2970511317253113, "learning_rate": 8.501265522111079e-06, "loss": 0.3254286050796509, "step": 12067 }, { "epoch": 14.807361963190184, "grad_norm": 0.2772025763988495, "learning_rate": 8.497494909176556e-06, "loss": 0.6629770994186401, "step": 12068 }, { "epoch": 14.808588957055214, "grad_norm": 0.22415603697299957, "learning_rate": 8.493724961410942e-06, "loss": 0.5759689807891846, "step": 12069 }, { "epoch": 14.809815950920246, "grad_norm": 0.24580180644989014, "learning_rate": 8.489955678966193e-06, "loss": 0.39863598346710205, "step": 12070 }, { "epoch": 14.811042944785276, "grad_norm": 0.25541573762893677, "learning_rate": 8.486187061994246e-06, "loss": 0.5176928043365479, "step": 12071 }, { "epoch": 14.812269938650306, "grad_norm": 0.2639443576335907, "learning_rate": 8.482419110647e-06, "loss": 0.7356464862823486, "step": 12072 }, { "epoch": 14.813496932515337, "grad_norm": 0.22396975755691528, "learning_rate": 8.478651825076337e-06, "loss": 0.24342593550682068, "step": 12073 }, { "epoch": 14.814723926380369, "grad_norm": 0.23549909889698029, "learning_rate": 8.474885205434089e-06, "loss": 0.6200908422470093, "step": 12074 }, { "epoch": 14.815950920245399, "grad_norm": 0.2717152535915375, "learning_rate": 8.471119251872087e-06, "loss": 0.7774764895439148, "step": 12075 }, { "epoch": 14.81717791411043, "grad_norm": 0.2731487452983856, "learning_rate": 8.467353964542127e-06, "loss": 0.6121139526367188, "step": 12076 }, { "epoch": 14.81840490797546, "grad_norm": 0.24711120128631592, "learning_rate": 8.463589343595975e-06, "loss": 0.592678964138031, "step": 12077 }, { "epoch": 14.819631901840491, "grad_norm": 0.2825676500797272, "learning_rate": 8.459825389185386e-06, "loss": 0.5696126222610474, "step": 12078 }, { "epoch": 14.820858895705522, "grad_norm": 0.32290229201316833, "learning_rate": 8.456062101462051e-06, "loss": 0.5734421610832214, "step": 12079 }, { "epoch": 14.822085889570552, "grad_norm": 0.28514033555984497, "learning_rate": 8.452299480577666e-06, "loss": 0.6021701097488403, "step": 12080 }, { "epoch": 14.823312883435582, "grad_norm": 0.36396363377571106, "learning_rate": 8.448537526683894e-06, "loss": 0.6983008980751038, "step": 12081 }, { "epoch": 14.824539877300614, "grad_norm": 0.2599121630191803, "learning_rate": 8.44477623993237e-06, "loss": 0.5673072338104248, "step": 12082 }, { "epoch": 14.825766871165644, "grad_norm": 0.34335869550704956, "learning_rate": 8.441015620474704e-06, "loss": 0.4990951418876648, "step": 12083 }, { "epoch": 14.826993865030675, "grad_norm": 0.20772141218185425, "learning_rate": 8.437255668462466e-06, "loss": 0.38272625207901, "step": 12084 }, { "epoch": 14.828220858895705, "grad_norm": 0.30258622765541077, "learning_rate": 8.43349638404721e-06, "loss": 0.5487270355224609, "step": 12085 }, { "epoch": 14.829447852760737, "grad_norm": 0.25096407532691956, "learning_rate": 8.429737767380469e-06, "loss": 0.5510201454162598, "step": 12086 }, { "epoch": 14.830674846625767, "grad_norm": 0.2356766015291214, "learning_rate": 8.425979818613736e-06, "loss": 0.5849736332893372, "step": 12087 }, { "epoch": 14.831901840490797, "grad_norm": 0.3183380663394928, "learning_rate": 8.422222537898494e-06, "loss": 0.4693901538848877, "step": 12088 }, { "epoch": 14.833128834355827, "grad_norm": 0.26977309584617615, "learning_rate": 8.418465925386165e-06, "loss": 0.7100759744644165, "step": 12089 }, { "epoch": 14.83435582822086, "grad_norm": 0.27401214838027954, "learning_rate": 8.414709981228191e-06, "loss": 0.6805000901222229, "step": 12090 }, { "epoch": 14.83558282208589, "grad_norm": 0.28359586000442505, "learning_rate": 8.410954705575955e-06, "loss": 0.8265896439552307, "step": 12091 }, { "epoch": 14.83680981595092, "grad_norm": 0.3068745732307434, "learning_rate": 8.407200098580828e-06, "loss": 0.6539585590362549, "step": 12092 }, { "epoch": 14.83803680981595, "grad_norm": 0.2224583476781845, "learning_rate": 8.403446160394134e-06, "loss": 0.5153806209564209, "step": 12093 }, { "epoch": 14.839263803680982, "grad_norm": 0.23423142731189728, "learning_rate": 8.39969289116719e-06, "loss": 0.5083975791931152, "step": 12094 }, { "epoch": 14.840490797546012, "grad_norm": 0.34433993697166443, "learning_rate": 8.39594029105128e-06, "loss": 0.47569096088409424, "step": 12095 }, { "epoch": 14.841717791411043, "grad_norm": 0.2541719377040863, "learning_rate": 8.392188360197662e-06, "loss": 0.5424896478652954, "step": 12096 }, { "epoch": 14.842944785276075, "grad_norm": 0.27017346024513245, "learning_rate": 8.38843709875757e-06, "loss": 0.7838400602340698, "step": 12097 }, { "epoch": 14.844171779141105, "grad_norm": 0.2564573287963867, "learning_rate": 8.384686506882194e-06, "loss": 0.4304904341697693, "step": 12098 }, { "epoch": 14.845398773006135, "grad_norm": 0.24154114723205566, "learning_rate": 8.380936584722718e-06, "loss": 0.7267132997512817, "step": 12099 }, { "epoch": 14.846625766871165, "grad_norm": 0.3046126961708069, "learning_rate": 8.377187332430287e-06, "loss": 0.5872713327407837, "step": 12100 }, { "epoch": 14.847852760736195, "grad_norm": 0.32121801376342773, "learning_rate": 8.373438750156026e-06, "loss": 0.4348262548446655, "step": 12101 }, { "epoch": 14.849079754601227, "grad_norm": 0.28559860587120056, "learning_rate": 8.369690838051037e-06, "loss": 0.6707742214202881, "step": 12102 }, { "epoch": 14.850306748466258, "grad_norm": 0.27847233414649963, "learning_rate": 8.365943596266372e-06, "loss": 0.6719820499420166, "step": 12103 }, { "epoch": 14.851533742331288, "grad_norm": 0.246531143784523, "learning_rate": 8.362197024953075e-06, "loss": 0.48216256499290466, "step": 12104 }, { "epoch": 14.85276073619632, "grad_norm": 0.25139108300209045, "learning_rate": 8.358451124262165e-06, "loss": 0.4192243814468384, "step": 12105 }, { "epoch": 14.85398773006135, "grad_norm": 0.252527117729187, "learning_rate": 8.354705894344628e-06, "loss": 0.5945794582366943, "step": 12106 }, { "epoch": 14.85521472392638, "grad_norm": 0.2757382094860077, "learning_rate": 8.350961335351432e-06, "loss": 0.37044233083724976, "step": 12107 }, { "epoch": 14.85644171779141, "grad_norm": 0.2857295870780945, "learning_rate": 8.34721744743349e-06, "loss": 0.5927685499191284, "step": 12108 }, { "epoch": 14.857668711656443, "grad_norm": 0.26553934812545776, "learning_rate": 8.343474230741715e-06, "loss": 0.44295525550842285, "step": 12109 }, { "epoch": 14.858895705521473, "grad_norm": 0.25254493951797485, "learning_rate": 8.339731685426991e-06, "loss": 0.4489487409591675, "step": 12110 }, { "epoch": 14.860122699386503, "grad_norm": 0.33026793599128723, "learning_rate": 8.335989811640166e-06, "loss": 0.6925228834152222, "step": 12111 }, { "epoch": 14.861349693251533, "grad_norm": 0.2944170832633972, "learning_rate": 8.332248609532064e-06, "loss": 0.5092383623123169, "step": 12112 }, { "epoch": 14.862576687116565, "grad_norm": 0.2489902526140213, "learning_rate": 8.328508079253483e-06, "loss": 0.522442102432251, "step": 12113 }, { "epoch": 14.863803680981595, "grad_norm": 0.2816413938999176, "learning_rate": 8.324768220955193e-06, "loss": 0.7485986948013306, "step": 12114 }, { "epoch": 14.865030674846626, "grad_norm": 0.32956844568252563, "learning_rate": 8.321029034787936e-06, "loss": 0.8051059246063232, "step": 12115 }, { "epoch": 14.866257668711656, "grad_norm": 0.25656774640083313, "learning_rate": 8.317290520902437e-06, "loss": 0.5051002502441406, "step": 12116 }, { "epoch": 14.867484662576688, "grad_norm": 0.26777181029319763, "learning_rate": 8.313552679449369e-06, "loss": 0.5342955589294434, "step": 12117 }, { "epoch": 14.868711656441718, "grad_norm": 0.31329989433288574, "learning_rate": 8.3098155105794e-06, "loss": 0.6278095841407776, "step": 12118 }, { "epoch": 14.869938650306748, "grad_norm": 0.27368324995040894, "learning_rate": 8.306079014443166e-06, "loss": 0.4500773549079895, "step": 12119 }, { "epoch": 14.871165644171779, "grad_norm": 0.2943175137042999, "learning_rate": 8.302343191191275e-06, "loss": 0.5990756750106812, "step": 12120 }, { "epoch": 14.87239263803681, "grad_norm": 0.2730679214000702, "learning_rate": 8.298608040974315e-06, "loss": 0.49890345335006714, "step": 12121 }, { "epoch": 14.87361963190184, "grad_norm": 0.2605831027030945, "learning_rate": 8.294873563942824e-06, "loss": 0.5261369347572327, "step": 12122 }, { "epoch": 14.874846625766871, "grad_norm": 0.28900009393692017, "learning_rate": 8.291139760247333e-06, "loss": 0.5634433627128601, "step": 12123 }, { "epoch": 14.876073619631901, "grad_norm": 0.2513706684112549, "learning_rate": 8.287406630038344e-06, "loss": 0.5904403924942017, "step": 12124 }, { "epoch": 14.877300613496933, "grad_norm": 0.23232457041740417, "learning_rate": 8.283674173466329e-06, "loss": 0.591655969619751, "step": 12125 }, { "epoch": 14.878527607361963, "grad_norm": 0.35563036799430847, "learning_rate": 8.279942390681738e-06, "loss": 0.44638437032699585, "step": 12126 }, { "epoch": 14.879754601226994, "grad_norm": 0.26274555921554565, "learning_rate": 8.276211281834976e-06, "loss": 0.6857004165649414, "step": 12127 }, { "epoch": 14.880981595092024, "grad_norm": 0.2784801423549652, "learning_rate": 8.272480847076435e-06, "loss": 0.5221289992332458, "step": 12128 }, { "epoch": 14.882208588957056, "grad_norm": 0.253235399723053, "learning_rate": 8.268751086556486e-06, "loss": 0.6520178318023682, "step": 12129 }, { "epoch": 14.883435582822086, "grad_norm": 0.29885563254356384, "learning_rate": 8.265022000425463e-06, "loss": 0.5712480545043945, "step": 12130 }, { "epoch": 14.884662576687116, "grad_norm": 0.254719614982605, "learning_rate": 8.261293588833672e-06, "loss": 0.6196016073226929, "step": 12131 }, { "epoch": 14.885889570552147, "grad_norm": 0.2686510980129242, "learning_rate": 8.257565851931404e-06, "loss": 0.48412081599235535, "step": 12132 }, { "epoch": 14.887116564417179, "grad_norm": 0.2505896985530853, "learning_rate": 8.253838789868899e-06, "loss": 0.5646214485168457, "step": 12133 }, { "epoch": 14.888343558282209, "grad_norm": 0.325680136680603, "learning_rate": 8.250112402796381e-06, "loss": 0.4292070269584656, "step": 12134 }, { "epoch": 14.889570552147239, "grad_norm": 0.2586270272731781, "learning_rate": 8.24638669086407e-06, "loss": 0.6359429359436035, "step": 12135 }, { "epoch": 14.89079754601227, "grad_norm": 0.25537145137786865, "learning_rate": 8.242661654222134e-06, "loss": 0.651124119758606, "step": 12136 }, { "epoch": 14.892024539877301, "grad_norm": 0.2658279240131378, "learning_rate": 8.238937293020707e-06, "loss": 0.350715309381485, "step": 12137 }, { "epoch": 14.893251533742331, "grad_norm": 0.30998462438583374, "learning_rate": 8.235213607409911e-06, "loss": 0.7329114675521851, "step": 12138 }, { "epoch": 14.894478527607362, "grad_norm": 0.26685386896133423, "learning_rate": 8.231490597539843e-06, "loss": 0.4182831645011902, "step": 12139 }, { "epoch": 14.895705521472392, "grad_norm": 0.32446932792663574, "learning_rate": 8.227768263560561e-06, "loss": 0.6028509140014648, "step": 12140 }, { "epoch": 14.896932515337424, "grad_norm": 0.30590111017227173, "learning_rate": 8.224046605622115e-06, "loss": 0.6041759252548218, "step": 12141 }, { "epoch": 14.898159509202454, "grad_norm": 0.22672177851200104, "learning_rate": 8.220325623874497e-06, "loss": 0.5783698558807373, "step": 12142 }, { "epoch": 14.899386503067484, "grad_norm": 0.2577112913131714, "learning_rate": 8.216605318467694e-06, "loss": 0.5620772838592529, "step": 12143 }, { "epoch": 14.900613496932515, "grad_norm": 0.2601008415222168, "learning_rate": 8.212885689551661e-06, "loss": 0.6716060638427734, "step": 12144 }, { "epoch": 14.901840490797547, "grad_norm": 0.2883983254432678, "learning_rate": 8.20916673727633e-06, "loss": 0.8581181764602661, "step": 12145 }, { "epoch": 14.903067484662577, "grad_norm": 0.2680347263813019, "learning_rate": 8.205448461791606e-06, "loss": 0.5335216522216797, "step": 12146 }, { "epoch": 14.904294478527607, "grad_norm": 0.28915250301361084, "learning_rate": 8.201730863247348e-06, "loss": 0.6363865733146667, "step": 12147 }, { "epoch": 14.905521472392637, "grad_norm": 0.2618332505226135, "learning_rate": 8.198013941793405e-06, "loss": 0.6903671026229858, "step": 12148 }, { "epoch": 14.90674846625767, "grad_norm": 0.24763067066669464, "learning_rate": 8.194297697579604e-06, "loss": 0.7637999057769775, "step": 12149 }, { "epoch": 14.9079754601227, "grad_norm": 0.2607378363609314, "learning_rate": 8.19058213075573e-06, "loss": 0.6374833583831787, "step": 12150 }, { "epoch": 14.90920245398773, "grad_norm": 0.22813533246517181, "learning_rate": 8.186867241471556e-06, "loss": 0.6084478497505188, "step": 12151 }, { "epoch": 14.91042944785276, "grad_norm": 0.2529582679271698, "learning_rate": 8.183153029876803e-06, "loss": 0.6136919260025024, "step": 12152 }, { "epoch": 14.911656441717792, "grad_norm": 0.2743191123008728, "learning_rate": 8.179439496121189e-06, "loss": 0.44089382886886597, "step": 12153 }, { "epoch": 14.912883435582822, "grad_norm": 0.3132230341434479, "learning_rate": 8.175726640354392e-06, "loss": 0.6676274538040161, "step": 12154 }, { "epoch": 14.914110429447852, "grad_norm": 0.25862541794776917, "learning_rate": 8.172014462726072e-06, "loss": 0.6680113077163696, "step": 12155 }, { "epoch": 14.915337423312884, "grad_norm": 0.29938313364982605, "learning_rate": 8.168302963385855e-06, "loss": 0.6996031999588013, "step": 12156 }, { "epoch": 14.916564417177915, "grad_norm": 0.23471589386463165, "learning_rate": 8.164592142483338e-06, "loss": 0.6520950198173523, "step": 12157 }, { "epoch": 14.917791411042945, "grad_norm": 0.2506010830402374, "learning_rate": 8.160882000168096e-06, "loss": 0.8163925409317017, "step": 12158 }, { "epoch": 14.919018404907975, "grad_norm": 0.2504017949104309, "learning_rate": 8.157172536589677e-06, "loss": 0.7206507921218872, "step": 12159 }, { "epoch": 14.920245398773005, "grad_norm": 0.27162978053092957, "learning_rate": 8.1534637518976e-06, "loss": 0.7752219438552856, "step": 12160 }, { "epoch": 14.921472392638037, "grad_norm": 0.24336795508861542, "learning_rate": 8.149755646241344e-06, "loss": 0.5450709462165833, "step": 12161 }, { "epoch": 14.922699386503067, "grad_norm": 0.3246590197086334, "learning_rate": 8.14604821977038e-06, "loss": 0.5989950895309448, "step": 12162 }, { "epoch": 14.923926380368098, "grad_norm": 0.24257701635360718, "learning_rate": 8.142341472634146e-06, "loss": 0.7037353515625, "step": 12163 }, { "epoch": 14.92515337423313, "grad_norm": 0.3074709475040436, "learning_rate": 8.138635404982045e-06, "loss": 0.6847996711730957, "step": 12164 }, { "epoch": 14.92638036809816, "grad_norm": 0.2281048595905304, "learning_rate": 8.134930016963469e-06, "loss": 0.46839362382888794, "step": 12165 }, { "epoch": 14.92760736196319, "grad_norm": 0.2892107367515564, "learning_rate": 8.131225308727756e-06, "loss": 0.6098520755767822, "step": 12166 }, { "epoch": 14.92883435582822, "grad_norm": 0.2615659534931183, "learning_rate": 8.127521280424239e-06, "loss": 0.7155150175094604, "step": 12167 }, { "epoch": 14.93006134969325, "grad_norm": 0.3715873062610626, "learning_rate": 8.123817932202215e-06, "loss": 0.5058139562606812, "step": 12168 }, { "epoch": 14.931288343558283, "grad_norm": 0.2477559745311737, "learning_rate": 8.120115264210962e-06, "loss": 0.6840939521789551, "step": 12169 }, { "epoch": 14.932515337423313, "grad_norm": 0.2629622220993042, "learning_rate": 8.116413276599725e-06, "loss": 0.6164584755897522, "step": 12170 }, { "epoch": 14.933742331288343, "grad_norm": 0.24628101289272308, "learning_rate": 8.112711969517709e-06, "loss": 0.6398584842681885, "step": 12171 }, { "epoch": 14.934969325153375, "grad_norm": 0.27064988017082214, "learning_rate": 8.109011343114105e-06, "loss": 0.564244270324707, "step": 12172 }, { "epoch": 14.936196319018405, "grad_norm": 0.2820412516593933, "learning_rate": 8.105311397538085e-06, "loss": 0.5064666271209717, "step": 12173 }, { "epoch": 14.937423312883435, "grad_norm": 0.2453925907611847, "learning_rate": 8.101612132938773e-06, "loss": 0.5621531009674072, "step": 12174 }, { "epoch": 14.938650306748466, "grad_norm": 0.30744606256484985, "learning_rate": 8.097913549465294e-06, "loss": 0.9470994472503662, "step": 12175 }, { "epoch": 14.939877300613498, "grad_norm": 0.28462129831314087, "learning_rate": 8.0942156472667e-06, "loss": 0.6261320114135742, "step": 12176 }, { "epoch": 14.941104294478528, "grad_norm": 0.30766284465789795, "learning_rate": 8.090518426492059e-06, "loss": 0.5678890943527222, "step": 12177 }, { "epoch": 14.942331288343558, "grad_norm": 0.2551681697368622, "learning_rate": 8.086821887290383e-06, "loss": 0.5929213762283325, "step": 12178 }, { "epoch": 14.943558282208588, "grad_norm": 0.24620340764522552, "learning_rate": 8.083126029810698e-06, "loss": 0.4829927682876587, "step": 12179 }, { "epoch": 14.94478527607362, "grad_norm": 0.27435651421546936, "learning_rate": 8.079430854201944e-06, "loss": 0.6242481470108032, "step": 12180 }, { "epoch": 14.94601226993865, "grad_norm": 0.24950741231441498, "learning_rate": 8.075736360613072e-06, "loss": 0.5365420579910278, "step": 12181 }, { "epoch": 14.94723926380368, "grad_norm": 0.29477494955062866, "learning_rate": 8.072042549193002e-06, "loss": 0.55162113904953, "step": 12182 }, { "epoch": 14.948466257668711, "grad_norm": 0.2639874815940857, "learning_rate": 8.068349420090613e-06, "loss": 0.5882682204246521, "step": 12183 }, { "epoch": 14.949693251533743, "grad_norm": 0.25679561495780945, "learning_rate": 8.064656973454777e-06, "loss": 0.7031394243240356, "step": 12184 }, { "epoch": 14.950920245398773, "grad_norm": 0.3737792372703552, "learning_rate": 8.06096520943431e-06, "loss": 0.7003644704818726, "step": 12185 }, { "epoch": 14.952147239263804, "grad_norm": 0.3028466999530792, "learning_rate": 8.057274128178025e-06, "loss": 0.8097028732299805, "step": 12186 }, { "epoch": 14.953374233128834, "grad_norm": 0.2781236171722412, "learning_rate": 8.053583729834696e-06, "loss": 0.8125394582748413, "step": 12187 }, { "epoch": 14.954601226993866, "grad_norm": 0.2569591999053955, "learning_rate": 8.049894014553076e-06, "loss": 0.7329394817352295, "step": 12188 }, { "epoch": 14.955828220858896, "grad_norm": 0.26069340109825134, "learning_rate": 8.04620498248189e-06, "loss": 0.773414134979248, "step": 12189 }, { "epoch": 14.957055214723926, "grad_norm": 0.29635360836982727, "learning_rate": 8.04251663376982e-06, "loss": 0.5474892854690552, "step": 12190 }, { "epoch": 14.958282208588956, "grad_norm": 0.28257110714912415, "learning_rate": 8.038828968565543e-06, "loss": 0.5968579649925232, "step": 12191 }, { "epoch": 14.959509202453988, "grad_norm": 0.2523920238018036, "learning_rate": 8.03514198701769e-06, "loss": 0.4300342798233032, "step": 12192 }, { "epoch": 14.960736196319019, "grad_norm": 0.2708308696746826, "learning_rate": 8.031455689274883e-06, "loss": 0.5787079930305481, "step": 12193 }, { "epoch": 14.961963190184049, "grad_norm": 0.5532419681549072, "learning_rate": 8.02777007548571e-06, "loss": 0.6735219359397888, "step": 12194 }, { "epoch": 14.963190184049079, "grad_norm": 0.2617846131324768, "learning_rate": 8.02408514579871e-06, "loss": 0.6058697700500488, "step": 12195 }, { "epoch": 14.964417177914111, "grad_norm": 0.347329318523407, "learning_rate": 8.020400900362418e-06, "loss": 0.5507240295410156, "step": 12196 }, { "epoch": 14.965644171779141, "grad_norm": 0.3046357333660126, "learning_rate": 8.016717339325341e-06, "loss": 0.4608156085014343, "step": 12197 }, { "epoch": 14.966871165644172, "grad_norm": 0.30825722217559814, "learning_rate": 8.01303446283595e-06, "loss": 0.4795478582382202, "step": 12198 }, { "epoch": 14.968098159509202, "grad_norm": 0.2332584261894226, "learning_rate": 8.0093522710427e-06, "loss": 0.6264457702636719, "step": 12199 }, { "epoch": 14.969325153374234, "grad_norm": 0.282027006149292, "learning_rate": 8.005670764093992e-06, "loss": 0.442935049533844, "step": 12200 }, { "epoch": 14.970552147239264, "grad_norm": 0.2722181975841522, "learning_rate": 8.001989942138219e-06, "loss": 0.5282471179962158, "step": 12201 }, { "epoch": 14.971779141104294, "grad_norm": 0.242214173078537, "learning_rate": 7.998309805323758e-06, "loss": 0.6430724263191223, "step": 12202 }, { "epoch": 14.973006134969324, "grad_norm": 0.2605513334274292, "learning_rate": 7.994630353798949e-06, "loss": 0.7289800047874451, "step": 12203 }, { "epoch": 14.974233128834356, "grad_norm": 0.243565171957016, "learning_rate": 7.990951587712079e-06, "loss": 0.6480051279067993, "step": 12204 }, { "epoch": 14.975460122699387, "grad_norm": 0.341694712638855, "learning_rate": 7.98727350721144e-06, "loss": 0.4919290840625763, "step": 12205 }, { "epoch": 14.976687116564417, "grad_norm": 0.280248761177063, "learning_rate": 7.983596112445286e-06, "loss": 0.49252527952194214, "step": 12206 }, { "epoch": 14.977914110429447, "grad_norm": 0.23994027078151703, "learning_rate": 7.979919403561836e-06, "loss": 0.5295774936676025, "step": 12207 }, { "epoch": 14.979141104294479, "grad_norm": 0.29369959235191345, "learning_rate": 7.976243380709303e-06, "loss": 0.851479172706604, "step": 12208 }, { "epoch": 14.98036809815951, "grad_norm": 0.25122278928756714, "learning_rate": 7.972568044035838e-06, "loss": 0.6551274061203003, "step": 12209 }, { "epoch": 14.98159509202454, "grad_norm": 0.28454306721687317, "learning_rate": 7.96889339368959e-06, "loss": 0.3970775008201599, "step": 12210 }, { "epoch": 14.98282208588957, "grad_norm": 0.21086139976978302, "learning_rate": 7.96521942981868e-06, "loss": 0.3669244349002838, "step": 12211 }, { "epoch": 14.984049079754602, "grad_norm": 0.2544969618320465, "learning_rate": 7.961546152571184e-06, "loss": 0.9036043882369995, "step": 12212 }, { "epoch": 14.985276073619632, "grad_norm": 0.2555432915687561, "learning_rate": 7.957873562095177e-06, "loss": 0.6281977891921997, "step": 12213 }, { "epoch": 14.986503067484662, "grad_norm": 0.28406965732574463, "learning_rate": 7.954201658538676e-06, "loss": 0.5005377531051636, "step": 12214 }, { "epoch": 14.987730061349692, "grad_norm": 0.307982474565506, "learning_rate": 7.950530442049687e-06, "loss": 0.6217752695083618, "step": 12215 }, { "epoch": 14.988957055214724, "grad_norm": 0.25278300046920776, "learning_rate": 7.946859912776192e-06, "loss": 0.4836082458496094, "step": 12216 }, { "epoch": 14.990184049079755, "grad_norm": 0.28467926383018494, "learning_rate": 7.943190070866132e-06, "loss": 0.564477264881134, "step": 12217 }, { "epoch": 14.991411042944785, "grad_norm": 0.26643189787864685, "learning_rate": 7.939520916467444e-06, "loss": 0.6192678213119507, "step": 12218 }, { "epoch": 14.992638036809815, "grad_norm": 0.2718334496021271, "learning_rate": 7.935852449728005e-06, "loss": 0.5485998392105103, "step": 12219 }, { "epoch": 14.993865030674847, "grad_norm": 0.29451239109039307, "learning_rate": 7.932184670795679e-06, "loss": 0.6978896856307983, "step": 12220 }, { "epoch": 14.995092024539877, "grad_norm": 0.2327675223350525, "learning_rate": 7.928517579818312e-06, "loss": 0.5271817445755005, "step": 12221 }, { "epoch": 14.996319018404908, "grad_norm": 0.2449655532836914, "learning_rate": 7.924851176943714e-06, "loss": 0.5846239328384399, "step": 12222 }, { "epoch": 14.99754601226994, "grad_norm": 0.30818110704421997, "learning_rate": 7.921185462319663e-06, "loss": 0.6069141030311584, "step": 12223 }, { "epoch": 14.99877300613497, "grad_norm": 0.2899229824542999, "learning_rate": 7.917520436093916e-06, "loss": 0.5851218700408936, "step": 12224 }, { "epoch": 15.0, "grad_norm": 0.8154223561286926, "learning_rate": 7.913856098414202e-06, "loss": 0.7300482988357544, "step": 12225 }, { "epoch": 15.00122699386503, "grad_norm": 0.28227728605270386, "learning_rate": 7.910192449428217e-06, "loss": 0.5990380048751831, "step": 12226 }, { "epoch": 15.002453987730062, "grad_norm": 0.2387375384569168, "learning_rate": 7.906529489283638e-06, "loss": 0.45282286405563354, "step": 12227 }, { "epoch": 15.003680981595092, "grad_norm": 0.24534252285957336, "learning_rate": 7.902867218128096e-06, "loss": 0.5034437775611877, "step": 12228 }, { "epoch": 15.004907975460123, "grad_norm": 0.279374897480011, "learning_rate": 7.899205636109213e-06, "loss": 0.5841805338859558, "step": 12229 }, { "epoch": 15.006134969325153, "grad_norm": 0.19985799491405487, "learning_rate": 7.895544743374581e-06, "loss": 0.3025106191635132, "step": 12230 }, { "epoch": 15.007361963190185, "grad_norm": 0.24981063604354858, "learning_rate": 7.891884540071756e-06, "loss": 0.6402299404144287, "step": 12231 }, { "epoch": 15.008588957055215, "grad_norm": 0.3522179424762726, "learning_rate": 7.888225026348278e-06, "loss": 0.6299477815628052, "step": 12232 }, { "epoch": 15.009815950920245, "grad_norm": 0.23553360998630524, "learning_rate": 7.884566202351637e-06, "loss": 0.5623726844787598, "step": 12233 }, { "epoch": 15.011042944785276, "grad_norm": 0.22652213275432587, "learning_rate": 7.880908068229317e-06, "loss": 0.49042898416519165, "step": 12234 }, { "epoch": 15.012269938650308, "grad_norm": 0.2641263008117676, "learning_rate": 7.877250624128768e-06, "loss": 0.7895550727844238, "step": 12235 }, { "epoch": 15.013496932515338, "grad_norm": 0.27021917700767517, "learning_rate": 7.873593870197412e-06, "loss": 0.43387019634246826, "step": 12236 }, { "epoch": 15.014723926380368, "grad_norm": 0.2543054223060608, "learning_rate": 7.869937806582642e-06, "loss": 0.7091085910797119, "step": 12237 }, { "epoch": 15.015950920245398, "grad_norm": 0.27674296498298645, "learning_rate": 7.86628243343183e-06, "loss": 0.6436229944229126, "step": 12238 }, { "epoch": 15.01717791411043, "grad_norm": 0.2573181688785553, "learning_rate": 7.862627750892296e-06, "loss": 0.6144911646842957, "step": 12239 }, { "epoch": 15.01840490797546, "grad_norm": 0.25393185019493103, "learning_rate": 7.85897375911136e-06, "loss": 0.5712258815765381, "step": 12240 }, { "epoch": 15.01963190184049, "grad_norm": 0.2374788522720337, "learning_rate": 7.855320458236307e-06, "loss": 0.5509638786315918, "step": 12241 }, { "epoch": 15.020858895705521, "grad_norm": 0.2307787835597992, "learning_rate": 7.851667848414385e-06, "loss": 0.3081875443458557, "step": 12242 }, { "epoch": 15.022085889570553, "grad_norm": 0.3037867248058319, "learning_rate": 7.848015929792831e-06, "loss": 0.5597470998764038, "step": 12243 }, { "epoch": 15.023312883435583, "grad_norm": 0.2785622477531433, "learning_rate": 7.84436470251883e-06, "loss": 0.7590827941894531, "step": 12244 }, { "epoch": 15.024539877300613, "grad_norm": 0.27032405138015747, "learning_rate": 7.840714166739551e-06, "loss": 0.6080498695373535, "step": 12245 }, { "epoch": 15.025766871165644, "grad_norm": 0.2692570090293884, "learning_rate": 7.837064322602152e-06, "loss": 0.684985339641571, "step": 12246 }, { "epoch": 15.026993865030676, "grad_norm": 0.2559271454811096, "learning_rate": 7.833415170253747e-06, "loss": 0.552521824836731, "step": 12247 }, { "epoch": 15.028220858895706, "grad_norm": 0.28651559352874756, "learning_rate": 7.829766709841407e-06, "loss": 0.7302809953689575, "step": 12248 }, { "epoch": 15.029447852760736, "grad_norm": 0.2511884272098541, "learning_rate": 7.826118941512204e-06, "loss": 0.6040165424346924, "step": 12249 }, { "epoch": 15.030674846625766, "grad_norm": 0.26533418893814087, "learning_rate": 7.822471865413161e-06, "loss": 0.5615126490592957, "step": 12250 }, { "epoch": 15.031901840490798, "grad_norm": 0.28477737307548523, "learning_rate": 7.818825481691286e-06, "loss": 0.5799754858016968, "step": 12251 }, { "epoch": 15.033128834355828, "grad_norm": 0.27400389313697815, "learning_rate": 7.81517979049356e-06, "loss": 0.6619794368743896, "step": 12252 }, { "epoch": 15.034355828220859, "grad_norm": 0.22920869290828705, "learning_rate": 7.811534791966918e-06, "loss": 0.5353169441223145, "step": 12253 }, { "epoch": 15.035582822085889, "grad_norm": 0.27470627427101135, "learning_rate": 7.807890486258287e-06, "loss": 0.6742203235626221, "step": 12254 }, { "epoch": 15.036809815950921, "grad_norm": 0.2528253197669983, "learning_rate": 7.804246873514553e-06, "loss": 0.710635781288147, "step": 12255 }, { "epoch": 15.038036809815951, "grad_norm": 0.23221014440059662, "learning_rate": 7.800603953882587e-06, "loss": 0.5954753160476685, "step": 12256 }, { "epoch": 15.039263803680981, "grad_norm": 0.274739533662796, "learning_rate": 7.796961727509231e-06, "loss": 0.7580738663673401, "step": 12257 }, { "epoch": 15.040490797546012, "grad_norm": 0.27759042382240295, "learning_rate": 7.79332019454127e-06, "loss": 0.6974427700042725, "step": 12258 }, { "epoch": 15.041717791411044, "grad_norm": 0.2544759511947632, "learning_rate": 7.7896793551255e-06, "loss": 0.5894454717636108, "step": 12259 }, { "epoch": 15.042944785276074, "grad_norm": 0.2663516402244568, "learning_rate": 7.786039209408672e-06, "loss": 0.6762447357177734, "step": 12260 }, { "epoch": 15.044171779141104, "grad_norm": 0.25052186846733093, "learning_rate": 7.782399757537503e-06, "loss": 0.42661988735198975, "step": 12261 }, { "epoch": 15.045398773006134, "grad_norm": 0.2570938169956207, "learning_rate": 7.778760999658704e-06, "loss": 0.7657740116119385, "step": 12262 }, { "epoch": 15.046625766871166, "grad_norm": 0.2803470492362976, "learning_rate": 7.775122935918924e-06, "loss": 0.5367400050163269, "step": 12263 }, { "epoch": 15.047852760736196, "grad_norm": 0.2885693907737732, "learning_rate": 7.771485566464812e-06, "loss": 0.722505509853363, "step": 12264 }, { "epoch": 15.049079754601227, "grad_norm": 0.23130568861961365, "learning_rate": 7.76784889144298e-06, "loss": 0.671754002571106, "step": 12265 }, { "epoch": 15.050306748466257, "grad_norm": 0.2567044198513031, "learning_rate": 7.76421291100001e-06, "loss": 0.6230051517486572, "step": 12266 }, { "epoch": 15.051533742331289, "grad_norm": 0.27998024225234985, "learning_rate": 7.760577625282461e-06, "loss": 0.6192538738250732, "step": 12267 }, { "epoch": 15.05276073619632, "grad_norm": 0.2571423351764679, "learning_rate": 7.75694303443686e-06, "loss": 0.7289118766784668, "step": 12268 }, { "epoch": 15.05398773006135, "grad_norm": 0.2729181945323944, "learning_rate": 7.753309138609705e-06, "loss": 0.5770021677017212, "step": 12269 }, { "epoch": 15.05521472392638, "grad_norm": 0.2378392070531845, "learning_rate": 7.74967593794747e-06, "loss": 0.6304937601089478, "step": 12270 }, { "epoch": 15.056441717791412, "grad_norm": 0.2728478014469147, "learning_rate": 7.746043432596609e-06, "loss": 0.5643123984336853, "step": 12271 }, { "epoch": 15.057668711656442, "grad_norm": 0.32177844643592834, "learning_rate": 7.742411622703516e-06, "loss": 0.5473086833953857, "step": 12272 }, { "epoch": 15.058895705521472, "grad_norm": 0.2590107321739197, "learning_rate": 7.738780508414592e-06, "loss": 0.6582019329071045, "step": 12273 }, { "epoch": 15.060122699386502, "grad_norm": 0.2716086804866791, "learning_rate": 7.735150089876194e-06, "loss": 0.6931207180023193, "step": 12274 }, { "epoch": 15.061349693251534, "grad_norm": 0.321239173412323, "learning_rate": 7.731520367234656e-06, "loss": 0.6437380313873291, "step": 12275 }, { "epoch": 15.062576687116565, "grad_norm": 0.2703332304954529, "learning_rate": 7.727891340636287e-06, "loss": 0.559876024723053, "step": 12276 }, { "epoch": 15.063803680981595, "grad_norm": 0.2555783689022064, "learning_rate": 7.72426301022735e-06, "loss": 0.542984127998352, "step": 12277 }, { "epoch": 15.065030674846625, "grad_norm": 0.2636161744594574, "learning_rate": 7.7206353761541e-06, "loss": 0.6335946321487427, "step": 12278 }, { "epoch": 15.066257668711657, "grad_norm": 0.2671259343624115, "learning_rate": 7.717008438562753e-06, "loss": 0.7676749229431152, "step": 12279 }, { "epoch": 15.067484662576687, "grad_norm": 0.2576686143875122, "learning_rate": 7.713382197599506e-06, "loss": 0.63529372215271, "step": 12280 }, { "epoch": 15.068711656441717, "grad_norm": 0.27843403816223145, "learning_rate": 7.709756653410528e-06, "loss": 0.5903782844543457, "step": 12281 }, { "epoch": 15.069938650306748, "grad_norm": 0.25083643198013306, "learning_rate": 7.706131806141936e-06, "loss": 0.6735208034515381, "step": 12282 }, { "epoch": 15.07116564417178, "grad_norm": 0.32204851508140564, "learning_rate": 7.702507655939848e-06, "loss": 0.7121139764785767, "step": 12283 }, { "epoch": 15.07239263803681, "grad_norm": 0.3063693344593048, "learning_rate": 7.69888420295034e-06, "loss": 0.8774237632751465, "step": 12284 }, { "epoch": 15.07361963190184, "grad_norm": 0.3124667704105377, "learning_rate": 7.695261447319469e-06, "loss": 0.4544978737831116, "step": 12285 }, { "epoch": 15.07484662576687, "grad_norm": 0.29122674465179443, "learning_rate": 7.691639389193261e-06, "loss": 0.6442068815231323, "step": 12286 }, { "epoch": 15.076073619631902, "grad_norm": 0.2703547477722168, "learning_rate": 7.688018028717697e-06, "loss": 0.6434107422828674, "step": 12287 }, { "epoch": 15.077300613496933, "grad_norm": 0.23580387234687805, "learning_rate": 7.684397366038748e-06, "loss": 0.5764968395233154, "step": 12288 }, { "epoch": 15.078527607361963, "grad_norm": 0.25639376044273376, "learning_rate": 7.68077740130235e-06, "loss": 0.6088218688964844, "step": 12289 }, { "epoch": 15.079754601226995, "grad_norm": 0.24291837215423584, "learning_rate": 7.677158134654435e-06, "loss": 0.5445847511291504, "step": 12290 }, { "epoch": 15.080981595092025, "grad_norm": 0.26545560359954834, "learning_rate": 7.673539566240861e-06, "loss": 0.6198870539665222, "step": 12291 }, { "epoch": 15.082208588957055, "grad_norm": 0.2546093165874481, "learning_rate": 7.669921696207491e-06, "loss": 0.7122088670730591, "step": 12292 }, { "epoch": 15.083435582822085, "grad_norm": 0.2767599821090698, "learning_rate": 7.666304524700149e-06, "loss": 0.5913940668106079, "step": 12293 }, { "epoch": 15.084662576687117, "grad_norm": 0.24934251606464386, "learning_rate": 7.662688051864636e-06, "loss": 0.5507916808128357, "step": 12294 }, { "epoch": 15.085889570552148, "grad_norm": 0.24912475049495697, "learning_rate": 7.659072277846727e-06, "loss": 0.6318250298500061, "step": 12295 }, { "epoch": 15.087116564417178, "grad_norm": 0.26840925216674805, "learning_rate": 7.65545720279215e-06, "loss": 0.5777208209037781, "step": 12296 }, { "epoch": 15.088343558282208, "grad_norm": 0.26361748576164246, "learning_rate": 7.651842826846622e-06, "loss": 0.6882588267326355, "step": 12297 }, { "epoch": 15.08957055214724, "grad_norm": 0.28895214200019836, "learning_rate": 7.648229150155833e-06, "loss": 0.5429222583770752, "step": 12298 }, { "epoch": 15.09079754601227, "grad_norm": 0.294931560754776, "learning_rate": 7.64461617286544e-06, "loss": 0.629493772983551, "step": 12299 }, { "epoch": 15.0920245398773, "grad_norm": 0.2997985780239105, "learning_rate": 7.641003895121076e-06, "loss": 0.7439254522323608, "step": 12300 }, { "epoch": 15.09325153374233, "grad_norm": 0.26402485370635986, "learning_rate": 7.63739231706833e-06, "loss": 0.6547673344612122, "step": 12301 }, { "epoch": 15.094478527607363, "grad_norm": 0.3186984360218048, "learning_rate": 7.633781438852775e-06, "loss": 0.5636470317840576, "step": 12302 }, { "epoch": 15.095705521472393, "grad_norm": 0.2801607847213745, "learning_rate": 7.630171260619965e-06, "loss": 0.6403683423995972, "step": 12303 }, { "epoch": 15.096932515337423, "grad_norm": 0.31686773896217346, "learning_rate": 7.6265617825154064e-06, "loss": 0.6771284341812134, "step": 12304 }, { "epoch": 15.098159509202453, "grad_norm": 0.30475080013275146, "learning_rate": 7.622953004684605e-06, "loss": 0.6425697207450867, "step": 12305 }, { "epoch": 15.099386503067485, "grad_norm": 0.2923775911331177, "learning_rate": 7.619344927272995e-06, "loss": 0.6282086968421936, "step": 12306 }, { "epoch": 15.100613496932516, "grad_norm": 0.5289658904075623, "learning_rate": 7.615737550426022e-06, "loss": 0.7906070947647095, "step": 12307 }, { "epoch": 15.101840490797546, "grad_norm": 0.287281334400177, "learning_rate": 7.612130874289086e-06, "loss": 0.5471592545509338, "step": 12308 }, { "epoch": 15.103067484662576, "grad_norm": 0.25119534134864807, "learning_rate": 7.608524899007563e-06, "loss": 0.5320632457733154, "step": 12309 }, { "epoch": 15.104294478527608, "grad_norm": 0.29128438234329224, "learning_rate": 7.604919624726808e-06, "loss": 0.747635006904602, "step": 12310 }, { "epoch": 15.105521472392638, "grad_norm": 0.26822370290756226, "learning_rate": 7.60131505159212e-06, "loss": 0.8162224292755127, "step": 12311 }, { "epoch": 15.106748466257669, "grad_norm": 0.29253530502319336, "learning_rate": 7.597711179748795e-06, "loss": 0.5672475099563599, "step": 12312 }, { "epoch": 15.107975460122699, "grad_norm": 1.3505690097808838, "learning_rate": 7.594108009342104e-06, "loss": 0.6725649833679199, "step": 12313 }, { "epoch": 15.10920245398773, "grad_norm": 0.28683051466941833, "learning_rate": 7.590505540517287e-06, "loss": 0.6642078757286072, "step": 12314 }, { "epoch": 15.110429447852761, "grad_norm": 0.2572256922721863, "learning_rate": 7.586903773419529e-06, "loss": 0.7088650465011597, "step": 12315 }, { "epoch": 15.111656441717791, "grad_norm": 0.24967645108699799, "learning_rate": 7.583302708194018e-06, "loss": 0.7576503753662109, "step": 12316 }, { "epoch": 15.112883435582821, "grad_norm": 0.2496192455291748, "learning_rate": 7.579702344985898e-06, "loss": 0.5425200462341309, "step": 12317 }, { "epoch": 15.114110429447853, "grad_norm": 0.24713283777236938, "learning_rate": 7.5761026839402945e-06, "loss": 0.710542619228363, "step": 12318 }, { "epoch": 15.115337423312884, "grad_norm": 0.27744007110595703, "learning_rate": 7.572503725202304e-06, "loss": 0.7329202890396118, "step": 12319 }, { "epoch": 15.116564417177914, "grad_norm": 0.22480405867099762, "learning_rate": 7.5689054689169754e-06, "loss": 0.5798192620277405, "step": 12320 }, { "epoch": 15.117791411042944, "grad_norm": 0.22396109998226166, "learning_rate": 7.565307915229353e-06, "loss": 0.2869728207588196, "step": 12321 }, { "epoch": 15.119018404907976, "grad_norm": 0.3051031231880188, "learning_rate": 7.561711064284443e-06, "loss": 0.5151406526565552, "step": 12322 }, { "epoch": 15.120245398773006, "grad_norm": 0.2700635492801666, "learning_rate": 7.558114916227224e-06, "loss": 0.46841976046562195, "step": 12323 }, { "epoch": 15.121472392638037, "grad_norm": 0.2815941274166107, "learning_rate": 7.554519471202656e-06, "loss": 0.6820313930511475, "step": 12324 }, { "epoch": 15.122699386503067, "grad_norm": 0.27701476216316223, "learning_rate": 7.550924729355643e-06, "loss": 0.680799126625061, "step": 12325 }, { "epoch": 15.123926380368099, "grad_norm": 0.2472379505634308, "learning_rate": 7.547330690831089e-06, "loss": 0.7346881031990051, "step": 12326 }, { "epoch": 15.125153374233129, "grad_norm": 0.25230222940444946, "learning_rate": 7.543737355773856e-06, "loss": 0.5488269329071045, "step": 12327 }, { "epoch": 15.12638036809816, "grad_norm": 0.2656170427799225, "learning_rate": 7.540144724328785e-06, "loss": 0.6672239303588867, "step": 12328 }, { "epoch": 15.12760736196319, "grad_norm": 0.25909966230392456, "learning_rate": 7.536552796640692e-06, "loss": 0.6013014316558838, "step": 12329 }, { "epoch": 15.128834355828221, "grad_norm": 0.28002336621284485, "learning_rate": 7.532961572854338e-06, "loss": 0.7767571210861206, "step": 12330 }, { "epoch": 15.130061349693252, "grad_norm": 0.20789214968681335, "learning_rate": 7.529371053114489e-06, "loss": 0.4332241415977478, "step": 12331 }, { "epoch": 15.131288343558282, "grad_norm": 0.2723764181137085, "learning_rate": 7.5257812375658625e-06, "loss": 0.6277289390563965, "step": 12332 }, { "epoch": 15.132515337423312, "grad_norm": 0.22720810770988464, "learning_rate": 7.522192126353156e-06, "loss": 0.5321916341781616, "step": 12333 }, { "epoch": 15.133742331288344, "grad_norm": 0.26861774921417236, "learning_rate": 7.518603719621037e-06, "loss": 0.5675625801086426, "step": 12334 }, { "epoch": 15.134969325153374, "grad_norm": 0.29594770073890686, "learning_rate": 7.515016017514146e-06, "loss": 0.7963175773620605, "step": 12335 }, { "epoch": 15.136196319018405, "grad_norm": 0.23889802396297455, "learning_rate": 7.511429020177088e-06, "loss": 0.37953388690948486, "step": 12336 }, { "epoch": 15.137423312883435, "grad_norm": 0.23697781562805176, "learning_rate": 7.507842727754446e-06, "loss": 0.5350469946861267, "step": 12337 }, { "epoch": 15.138650306748467, "grad_norm": 0.25139620900154114, "learning_rate": 7.504257140390786e-06, "loss": 0.6986222267150879, "step": 12338 }, { "epoch": 15.139877300613497, "grad_norm": 0.2698340117931366, "learning_rate": 7.50067225823061e-06, "loss": 0.5429549217224121, "step": 12339 }, { "epoch": 15.141104294478527, "grad_norm": 0.220770463347435, "learning_rate": 7.497088081418427e-06, "loss": 0.46507951617240906, "step": 12340 }, { "epoch": 15.142331288343557, "grad_norm": 0.29543057084083557, "learning_rate": 7.493504610098703e-06, "loss": 0.5395705103874207, "step": 12341 }, { "epoch": 15.14355828220859, "grad_norm": 0.29568997025489807, "learning_rate": 7.4899218444158814e-06, "loss": 0.6971287131309509, "step": 12342 }, { "epoch": 15.14478527607362, "grad_norm": 0.26644042134284973, "learning_rate": 7.486339784514365e-06, "loss": 0.5840412974357605, "step": 12343 }, { "epoch": 15.14601226993865, "grad_norm": 0.2646333873271942, "learning_rate": 7.482758430538553e-06, "loss": 0.7515389919281006, "step": 12344 }, { "epoch": 15.14723926380368, "grad_norm": 0.265811949968338, "learning_rate": 7.479177782632779e-06, "loss": 0.658226490020752, "step": 12345 }, { "epoch": 15.148466257668712, "grad_norm": 0.3137977719306946, "learning_rate": 7.475597840941375e-06, "loss": 0.44742679595947266, "step": 12346 }, { "epoch": 15.149693251533742, "grad_norm": 0.24306799471378326, "learning_rate": 7.472018605608644e-06, "loss": 0.3665396273136139, "step": 12347 }, { "epoch": 15.150920245398773, "grad_norm": 0.28060340881347656, "learning_rate": 7.4684400767788515e-06, "loss": 0.5214630365371704, "step": 12348 }, { "epoch": 15.152147239263805, "grad_norm": 0.29781049489974976, "learning_rate": 7.464862254596244e-06, "loss": 0.7064821720123291, "step": 12349 }, { "epoch": 15.153374233128835, "grad_norm": 0.2891755998134613, "learning_rate": 7.4612851392050205e-06, "loss": 0.6931520700454712, "step": 12350 }, { "epoch": 15.154601226993865, "grad_norm": 0.24517124891281128, "learning_rate": 7.457708730749372e-06, "loss": 0.5633749961853027, "step": 12351 }, { "epoch": 15.155828220858895, "grad_norm": 0.2476765215396881, "learning_rate": 7.45413302937345e-06, "loss": 0.40576690435409546, "step": 12352 }, { "epoch": 15.157055214723927, "grad_norm": 0.27453547716140747, "learning_rate": 7.450558035221386e-06, "loss": 0.5399347543716431, "step": 12353 }, { "epoch": 15.158282208588957, "grad_norm": 0.27383872866630554, "learning_rate": 7.446983748437281e-06, "loss": 0.6643891334533691, "step": 12354 }, { "epoch": 15.159509202453988, "grad_norm": 0.26711800694465637, "learning_rate": 7.443410169165191e-06, "loss": 0.6680784225463867, "step": 12355 }, { "epoch": 15.160736196319018, "grad_norm": 0.25724393129348755, "learning_rate": 7.439837297549157e-06, "loss": 0.6837002038955688, "step": 12356 }, { "epoch": 15.16196319018405, "grad_norm": 0.2722293436527252, "learning_rate": 7.436265133733206e-06, "loss": 0.542007565498352, "step": 12357 }, { "epoch": 15.16319018404908, "grad_norm": 0.30582287907600403, "learning_rate": 7.432693677861319e-06, "loss": 0.45278117060661316, "step": 12358 }, { "epoch": 15.16441717791411, "grad_norm": 0.2698507606983185, "learning_rate": 7.429122930077442e-06, "loss": 0.5091054439544678, "step": 12359 }, { "epoch": 15.16564417177914, "grad_norm": 0.30864542722702026, "learning_rate": 7.425552890525506e-06, "loss": 0.7582444548606873, "step": 12360 }, { "epoch": 15.166871165644173, "grad_norm": 0.23753181099891663, "learning_rate": 7.4219835593494075e-06, "loss": 0.6767051815986633, "step": 12361 }, { "epoch": 15.168098159509203, "grad_norm": 0.26510316133499146, "learning_rate": 7.418414936693016e-06, "loss": 0.6167032718658447, "step": 12362 }, { "epoch": 15.169325153374233, "grad_norm": 0.2560417950153351, "learning_rate": 7.414847022700186e-06, "loss": 0.5512896776199341, "step": 12363 }, { "epoch": 15.170552147239263, "grad_norm": 0.2812911868095398, "learning_rate": 7.411279817514708e-06, "loss": 0.6996350288391113, "step": 12364 }, { "epoch": 15.171779141104295, "grad_norm": 0.2628202438354492, "learning_rate": 7.407713321280377e-06, "loss": 0.8051930665969849, "step": 12365 }, { "epoch": 15.173006134969325, "grad_norm": 0.276550829410553, "learning_rate": 7.404147534140945e-06, "loss": 0.6008024215698242, "step": 12366 }, { "epoch": 15.174233128834356, "grad_norm": 0.2361217439174652, "learning_rate": 7.400582456240143e-06, "loss": 0.5525553226470947, "step": 12367 }, { "epoch": 15.175460122699386, "grad_norm": 0.2766752243041992, "learning_rate": 7.397018087721674e-06, "loss": 0.6940187215805054, "step": 12368 }, { "epoch": 15.176687116564418, "grad_norm": 0.2561595141887665, "learning_rate": 7.393454428729194e-06, "loss": 0.6536808013916016, "step": 12369 }, { "epoch": 15.177914110429448, "grad_norm": 0.24918381869792938, "learning_rate": 7.389891479406349e-06, "loss": 0.5629763603210449, "step": 12370 }, { "epoch": 15.179141104294478, "grad_norm": 0.28802090883255005, "learning_rate": 7.386329239896753e-06, "loss": 0.49153780937194824, "step": 12371 }, { "epoch": 15.180368098159509, "grad_norm": 0.29722660779953003, "learning_rate": 7.382767710343991e-06, "loss": 0.7250961065292358, "step": 12372 }, { "epoch": 15.18159509202454, "grad_norm": 0.26445770263671875, "learning_rate": 7.379206890891624e-06, "loss": 0.7757737636566162, "step": 12373 }, { "epoch": 15.18282208588957, "grad_norm": 0.26607590913772583, "learning_rate": 7.375646781683163e-06, "loss": 0.6290223598480225, "step": 12374 }, { "epoch": 15.184049079754601, "grad_norm": 0.2415187805891037, "learning_rate": 7.372087382862114e-06, "loss": 0.6557188630104065, "step": 12375 }, { "epoch": 15.185276073619631, "grad_norm": 0.2742413282394409, "learning_rate": 7.368528694571944e-06, "loss": 0.5009097456932068, "step": 12376 }, { "epoch": 15.186503067484663, "grad_norm": 0.26500123739242554, "learning_rate": 7.3649707169560985e-06, "loss": 0.658852756023407, "step": 12377 }, { "epoch": 15.187730061349694, "grad_norm": 0.2756420373916626, "learning_rate": 7.361413450157986e-06, "loss": 0.6267030239105225, "step": 12378 }, { "epoch": 15.188957055214724, "grad_norm": 0.2545134127140045, "learning_rate": 7.357856894320992e-06, "loss": 0.6305005550384521, "step": 12379 }, { "epoch": 15.190184049079754, "grad_norm": 0.28942936658859253, "learning_rate": 7.354301049588469e-06, "loss": 0.43143802881240845, "step": 12380 }, { "epoch": 15.191411042944786, "grad_norm": 0.26659661531448364, "learning_rate": 7.35074591610374e-06, "loss": 0.7756381630897522, "step": 12381 }, { "epoch": 15.192638036809816, "grad_norm": 0.23533153533935547, "learning_rate": 7.347191494010119e-06, "loss": 0.6701220273971558, "step": 12382 }, { "epoch": 15.193865030674846, "grad_norm": 0.3125151991844177, "learning_rate": 7.343637783450852e-06, "loss": 0.6618349552154541, "step": 12383 }, { "epoch": 15.195092024539877, "grad_norm": 0.24868899583816528, "learning_rate": 7.340084784569188e-06, "loss": 0.6773483753204346, "step": 12384 }, { "epoch": 15.196319018404909, "grad_norm": 0.2981931269168854, "learning_rate": 7.336532497508336e-06, "loss": 0.6696873307228088, "step": 12385 }, { "epoch": 15.197546012269939, "grad_norm": 0.23525670170783997, "learning_rate": 7.3329809224114835e-06, "loss": 0.4781076908111572, "step": 12386 }, { "epoch": 15.198773006134969, "grad_norm": 0.2943425178527832, "learning_rate": 7.329430059421791e-06, "loss": 0.7083110809326172, "step": 12387 }, { "epoch": 15.2, "grad_norm": 0.257845938205719, "learning_rate": 7.325879908682365e-06, "loss": 0.6110409498214722, "step": 12388 }, { "epoch": 15.201226993865031, "grad_norm": 0.29992130398750305, "learning_rate": 7.3223304703363135e-06, "loss": 0.704526960849762, "step": 12389 }, { "epoch": 15.202453987730062, "grad_norm": 0.27307337522506714, "learning_rate": 7.318781744526701e-06, "loss": 0.550625205039978, "step": 12390 }, { "epoch": 15.203680981595092, "grad_norm": 0.24114899337291718, "learning_rate": 7.31523373139657e-06, "loss": 0.5648844242095947, "step": 12391 }, { "epoch": 15.204907975460122, "grad_norm": 0.28097397089004517, "learning_rate": 7.311686431088935e-06, "loss": 0.6224045753479004, "step": 12392 }, { "epoch": 15.206134969325154, "grad_norm": 0.34259214997291565, "learning_rate": 7.308139843746764e-06, "loss": 0.5205556154251099, "step": 12393 }, { "epoch": 15.207361963190184, "grad_norm": 0.2742466330528259, "learning_rate": 7.304593969513016e-06, "loss": 0.5895402431488037, "step": 12394 }, { "epoch": 15.208588957055214, "grad_norm": 0.2932404577732086, "learning_rate": 7.301048808530617e-06, "loss": 0.7073190212249756, "step": 12395 }, { "epoch": 15.209815950920245, "grad_norm": 0.29970964789390564, "learning_rate": 7.297504360942461e-06, "loss": 0.4031132757663727, "step": 12396 }, { "epoch": 15.211042944785277, "grad_norm": 0.2409334033727646, "learning_rate": 7.293960626891424e-06, "loss": 0.5861619710922241, "step": 12397 }, { "epoch": 15.212269938650307, "grad_norm": 0.26423919200897217, "learning_rate": 7.2904176065203275e-06, "loss": 0.672608494758606, "step": 12398 }, { "epoch": 15.213496932515337, "grad_norm": 0.22990870475769043, "learning_rate": 7.286875299971987e-06, "loss": 0.5671281218528748, "step": 12399 }, { "epoch": 15.214723926380367, "grad_norm": 0.2076493203639984, "learning_rate": 7.2833337073891835e-06, "loss": 0.42156168818473816, "step": 12400 }, { "epoch": 15.2159509202454, "grad_norm": 0.2578808665275574, "learning_rate": 7.279792828914669e-06, "loss": 0.6172522306442261, "step": 12401 }, { "epoch": 15.21717791411043, "grad_norm": 0.2743484377861023, "learning_rate": 7.276252664691166e-06, "loss": 0.6410616636276245, "step": 12402 }, { "epoch": 15.21840490797546, "grad_norm": 0.2698608636856079, "learning_rate": 7.272713214861368e-06, "loss": 0.5618435144424438, "step": 12403 }, { "epoch": 15.21963190184049, "grad_norm": 0.32010582089424133, "learning_rate": 7.269174479567942e-06, "loss": 0.8650197982788086, "step": 12404 }, { "epoch": 15.220858895705522, "grad_norm": 0.26132988929748535, "learning_rate": 7.2656364589535205e-06, "loss": 0.531437337398529, "step": 12405 }, { "epoch": 15.222085889570552, "grad_norm": 0.27477598190307617, "learning_rate": 7.262099153160723e-06, "loss": 0.6060389876365662, "step": 12406 }, { "epoch": 15.223312883435582, "grad_norm": 0.26628461480140686, "learning_rate": 7.25856256233211e-06, "loss": 0.673907995223999, "step": 12407 }, { "epoch": 15.224539877300613, "grad_norm": 0.2610754668712616, "learning_rate": 7.255026686610239e-06, "loss": 0.7017878293991089, "step": 12408 }, { "epoch": 15.225766871165645, "grad_norm": 0.27885890007019043, "learning_rate": 7.251491526137635e-06, "loss": 0.6630661487579346, "step": 12409 }, { "epoch": 15.226993865030675, "grad_norm": 0.26054683327674866, "learning_rate": 7.247957081056786e-06, "loss": 0.650179386138916, "step": 12410 }, { "epoch": 15.228220858895705, "grad_norm": 0.24621614813804626, "learning_rate": 7.244423351510163e-06, "loss": 0.7747174501419067, "step": 12411 }, { "epoch": 15.229447852760735, "grad_norm": 0.27873244881629944, "learning_rate": 7.240890337640188e-06, "loss": 0.5705885887145996, "step": 12412 }, { "epoch": 15.230674846625767, "grad_norm": 0.27380940318107605, "learning_rate": 7.23735803958927e-06, "loss": 0.6420834064483643, "step": 12413 }, { "epoch": 15.231901840490798, "grad_norm": 0.2579483389854431, "learning_rate": 7.2338264574997925e-06, "loss": 0.5188137292861938, "step": 12414 }, { "epoch": 15.233128834355828, "grad_norm": 0.24582886695861816, "learning_rate": 7.230295591514097e-06, "loss": 0.6007958054542542, "step": 12415 }, { "epoch": 15.23435582822086, "grad_norm": 0.23327940702438354, "learning_rate": 7.226765441774516e-06, "loss": 0.4772423505783081, "step": 12416 }, { "epoch": 15.23558282208589, "grad_norm": 0.2840683162212372, "learning_rate": 7.22323600842332e-06, "loss": 0.5674073100090027, "step": 12417 }, { "epoch": 15.23680981595092, "grad_norm": 0.26038655638694763, "learning_rate": 7.219707291602778e-06, "loss": 0.5688031911849976, "step": 12418 }, { "epoch": 15.23803680981595, "grad_norm": 0.31610769033432007, "learning_rate": 7.216179291455124e-06, "loss": 0.5882344841957092, "step": 12419 }, { "epoch": 15.239263803680982, "grad_norm": 0.2586122453212738, "learning_rate": 7.212652008122564e-06, "loss": 0.6319584846496582, "step": 12420 }, { "epoch": 15.240490797546013, "grad_norm": 0.314331591129303, "learning_rate": 7.2091254417472775e-06, "loss": 0.6317131519317627, "step": 12421 }, { "epoch": 15.241717791411043, "grad_norm": 0.28633853793144226, "learning_rate": 7.205599592471396e-06, "loss": 0.6966001987457275, "step": 12422 }, { "epoch": 15.242944785276073, "grad_norm": 0.2878642678260803, "learning_rate": 7.202074460437036e-06, "loss": 0.760841965675354, "step": 12423 }, { "epoch": 15.244171779141105, "grad_norm": 0.2813601791858673, "learning_rate": 7.198550045786301e-06, "loss": 0.3789204955101013, "step": 12424 }, { "epoch": 15.245398773006135, "grad_norm": 0.2686244547367096, "learning_rate": 7.195026348661249e-06, "loss": 0.5320967435836792, "step": 12425 }, { "epoch": 15.246625766871166, "grad_norm": 0.2753323018550873, "learning_rate": 7.191503369203897e-06, "loss": 0.728689432144165, "step": 12426 }, { "epoch": 15.247852760736196, "grad_norm": 0.2617837190628052, "learning_rate": 7.187981107556252e-06, "loss": 0.5335825085639954, "step": 12427 }, { "epoch": 15.249079754601228, "grad_norm": 0.3006690740585327, "learning_rate": 7.184459563860285e-06, "loss": 0.7432489395141602, "step": 12428 }, { "epoch": 15.250306748466258, "grad_norm": 0.2822488248348236, "learning_rate": 7.180938738257944e-06, "loss": 0.5481483936309814, "step": 12429 }, { "epoch": 15.251533742331288, "grad_norm": 0.2209480106830597, "learning_rate": 7.17741863089115e-06, "loss": 0.38872015476226807, "step": 12430 }, { "epoch": 15.252760736196318, "grad_norm": 0.24279791116714478, "learning_rate": 7.173899241901768e-06, "loss": 0.35126736760139465, "step": 12431 }, { "epoch": 15.25398773006135, "grad_norm": 0.3336370885372162, "learning_rate": 7.170380571431667e-06, "loss": 0.5246371626853943, "step": 12432 }, { "epoch": 15.25521472392638, "grad_norm": 0.3049771189689636, "learning_rate": 7.166862619622678e-06, "loss": 0.491152822971344, "step": 12433 }, { "epoch": 15.256441717791411, "grad_norm": 0.25522345304489136, "learning_rate": 7.163345386616591e-06, "loss": 0.651985228061676, "step": 12434 }, { "epoch": 15.257668711656441, "grad_norm": 0.26411232352256775, "learning_rate": 7.159828872555185e-06, "loss": 0.7051922082901001, "step": 12435 }, { "epoch": 15.258895705521473, "grad_norm": 0.30541783571243286, "learning_rate": 7.1563130775801925e-06, "loss": 0.6263314485549927, "step": 12436 }, { "epoch": 15.260122699386503, "grad_norm": 0.2960756719112396, "learning_rate": 7.152798001833325e-06, "loss": 0.4666706323623657, "step": 12437 }, { "epoch": 15.261349693251534, "grad_norm": 0.2695300281047821, "learning_rate": 7.14928364545627e-06, "loss": 0.43881142139434814, "step": 12438 }, { "epoch": 15.262576687116564, "grad_norm": 0.2532960772514343, "learning_rate": 7.14577000859068e-06, "loss": 0.6069026589393616, "step": 12439 }, { "epoch": 15.263803680981596, "grad_norm": 0.2680507302284241, "learning_rate": 7.142257091378177e-06, "loss": 0.47422394156455994, "step": 12440 }, { "epoch": 15.265030674846626, "grad_norm": 0.26714885234832764, "learning_rate": 7.138744893960367e-06, "loss": 0.6047227382659912, "step": 12441 }, { "epoch": 15.266257668711656, "grad_norm": 0.2914474904537201, "learning_rate": 7.135233416478801e-06, "loss": 0.7966474294662476, "step": 12442 }, { "epoch": 15.267484662576686, "grad_norm": 0.2636263370513916, "learning_rate": 7.131722659075024e-06, "loss": 0.7970821857452393, "step": 12443 }, { "epoch": 15.268711656441718, "grad_norm": 0.25348544120788574, "learning_rate": 7.128212621890546e-06, "loss": 0.34354740381240845, "step": 12444 }, { "epoch": 15.269938650306749, "grad_norm": 0.2565233111381531, "learning_rate": 7.124703305066843e-06, "loss": 0.5762424468994141, "step": 12445 }, { "epoch": 15.271165644171779, "grad_norm": 0.26444828510284424, "learning_rate": 7.121194708745368e-06, "loss": 0.5467661023139954, "step": 12446 }, { "epoch": 15.27239263803681, "grad_norm": 0.2785175144672394, "learning_rate": 7.117686833067544e-06, "loss": 0.534498393535614, "step": 12447 }, { "epoch": 15.273619631901841, "grad_norm": 0.26065385341644287, "learning_rate": 7.114179678174762e-06, "loss": 0.5857692956924438, "step": 12448 }, { "epoch": 15.274846625766871, "grad_norm": 0.26521703600883484, "learning_rate": 7.110673244208382e-06, "loss": 0.53392493724823, "step": 12449 }, { "epoch": 15.276073619631902, "grad_norm": 0.2988296151161194, "learning_rate": 7.107167531309753e-06, "loss": 0.7296571731567383, "step": 12450 }, { "epoch": 15.277300613496932, "grad_norm": 0.2645866572856903, "learning_rate": 7.103662539620159e-06, "loss": 0.742912769317627, "step": 12451 }, { "epoch": 15.278527607361964, "grad_norm": 0.23390944302082062, "learning_rate": 7.1001582692808875e-06, "loss": 0.4662191569805145, "step": 12452 }, { "epoch": 15.279754601226994, "grad_norm": 0.25976717472076416, "learning_rate": 7.096654720433182e-06, "loss": 0.44393593072891235, "step": 12453 }, { "epoch": 15.280981595092024, "grad_norm": 0.24470917880535126, "learning_rate": 7.093151893218267e-06, "loss": 0.39451077580451965, "step": 12454 }, { "epoch": 15.282208588957054, "grad_norm": 0.29972025752067566, "learning_rate": 7.089649787777331e-06, "loss": 0.686528205871582, "step": 12455 }, { "epoch": 15.283435582822086, "grad_norm": 0.24614864587783813, "learning_rate": 7.086148404251522e-06, "loss": 0.7289682626724243, "step": 12456 }, { "epoch": 15.284662576687117, "grad_norm": 0.2884972095489502, "learning_rate": 7.082647742781981e-06, "loss": 0.6506470441818237, "step": 12457 }, { "epoch": 15.285889570552147, "grad_norm": 0.2772994637489319, "learning_rate": 7.079147803509808e-06, "loss": 0.6661718487739563, "step": 12458 }, { "epoch": 15.287116564417177, "grad_norm": 0.23600797355175018, "learning_rate": 7.075648586576075e-06, "loss": 0.3590322732925415, "step": 12459 }, { "epoch": 15.28834355828221, "grad_norm": 0.2614605724811554, "learning_rate": 7.072150092121832e-06, "loss": 0.6387047171592712, "step": 12460 }, { "epoch": 15.28957055214724, "grad_norm": 0.2716406583786011, "learning_rate": 7.06865232028808e-06, "loss": 0.6863831877708435, "step": 12461 }, { "epoch": 15.29079754601227, "grad_norm": 0.2483191043138504, "learning_rate": 7.065155271215812e-06, "loss": 0.6548484563827515, "step": 12462 }, { "epoch": 15.2920245398773, "grad_norm": 0.2224285751581192, "learning_rate": 7.061658945045982e-06, "loss": 0.5711959600448608, "step": 12463 }, { "epoch": 15.293251533742332, "grad_norm": 0.2596416175365448, "learning_rate": 7.058163341919519e-06, "loss": 0.5208101272583008, "step": 12464 }, { "epoch": 15.294478527607362, "grad_norm": 0.23812617361545563, "learning_rate": 7.054668461977326e-06, "loss": 0.566799521446228, "step": 12465 }, { "epoch": 15.295705521472392, "grad_norm": 0.28709784150123596, "learning_rate": 7.051174305360259e-06, "loss": 0.5515084862709045, "step": 12466 }, { "epoch": 15.296932515337422, "grad_norm": 0.2881639003753662, "learning_rate": 7.047680872209156e-06, "loss": 0.5578733682632446, "step": 12467 }, { "epoch": 15.298159509202454, "grad_norm": 0.2456391155719757, "learning_rate": 7.044188162664844e-06, "loss": 0.6593813896179199, "step": 12468 }, { "epoch": 15.299386503067485, "grad_norm": 0.2975407838821411, "learning_rate": 7.040696176868103e-06, "loss": 0.733313262462616, "step": 12469 }, { "epoch": 15.300613496932515, "grad_norm": 0.2696363627910614, "learning_rate": 7.037204914959669e-06, "loss": 0.5379957556724548, "step": 12470 }, { "epoch": 15.301840490797545, "grad_norm": 0.2544742226600647, "learning_rate": 7.033714377080275e-06, "loss": 0.5240867137908936, "step": 12471 }, { "epoch": 15.303067484662577, "grad_norm": 0.26447775959968567, "learning_rate": 7.030224563370613e-06, "loss": 0.6768556833267212, "step": 12472 }, { "epoch": 15.304294478527607, "grad_norm": 0.2944006323814392, "learning_rate": 7.026735473971346e-06, "loss": 0.5585380792617798, "step": 12473 }, { "epoch": 15.305521472392638, "grad_norm": 0.267655611038208, "learning_rate": 7.023247109023118e-06, "loss": 0.3932633399963379, "step": 12474 }, { "epoch": 15.30674846625767, "grad_norm": 0.24971066415309906, "learning_rate": 7.019759468666523e-06, "loss": 0.7788875699043274, "step": 12475 }, { "epoch": 15.3079754601227, "grad_norm": 0.27386730909347534, "learning_rate": 7.016272553042139e-06, "loss": 0.4002714157104492, "step": 12476 }, { "epoch": 15.30920245398773, "grad_norm": 0.26032009720802307, "learning_rate": 7.012786362290519e-06, "loss": 0.5659597516059875, "step": 12477 }, { "epoch": 15.31042944785276, "grad_norm": 0.26137495040893555, "learning_rate": 7.009300896552179e-06, "loss": 0.7465043067932129, "step": 12478 }, { "epoch": 15.31165644171779, "grad_norm": 0.2650519013404846, "learning_rate": 7.0058161559676176e-06, "loss": 0.48742222785949707, "step": 12479 }, { "epoch": 15.312883435582823, "grad_norm": 0.2586667239665985, "learning_rate": 7.002332140677278e-06, "loss": 0.37370046973228455, "step": 12480 }, { "epoch": 15.314110429447853, "grad_norm": 0.2845611572265625, "learning_rate": 6.998848850821599e-06, "loss": 0.5731687545776367, "step": 12481 }, { "epoch": 15.315337423312883, "grad_norm": 0.31899553537368774, "learning_rate": 6.99536628654098e-06, "loss": 0.49734991788864136, "step": 12482 }, { "epoch": 15.316564417177915, "grad_norm": 0.24615712463855743, "learning_rate": 6.991884447975796e-06, "loss": 0.6257662773132324, "step": 12483 }, { "epoch": 15.317791411042945, "grad_norm": 0.27293121814727783, "learning_rate": 6.988403335266397e-06, "loss": 0.4951099157333374, "step": 12484 }, { "epoch": 15.319018404907975, "grad_norm": 0.2534116804599762, "learning_rate": 6.9849229485530825e-06, "loss": 0.4871716797351837, "step": 12485 }, { "epoch": 15.320245398773006, "grad_norm": 0.26956841349601746, "learning_rate": 6.981443287976142e-06, "loss": 0.7501716017723083, "step": 12486 }, { "epoch": 15.321472392638038, "grad_norm": 0.27559491991996765, "learning_rate": 6.977964353675834e-06, "loss": 0.4019300043582916, "step": 12487 }, { "epoch": 15.322699386503068, "grad_norm": 0.23980621993541718, "learning_rate": 6.974486145792381e-06, "loss": 0.5225110054016113, "step": 12488 }, { "epoch": 15.323926380368098, "grad_norm": 0.24989408254623413, "learning_rate": 6.971008664465986e-06, "loss": 0.6056481599807739, "step": 12489 }, { "epoch": 15.325153374233128, "grad_norm": 0.26911723613739014, "learning_rate": 6.967531909836799e-06, "loss": 0.6384356021881104, "step": 12490 }, { "epoch": 15.32638036809816, "grad_norm": 0.26392266154289246, "learning_rate": 6.964055882044979e-06, "loss": 0.6798989176750183, "step": 12491 }, { "epoch": 15.32760736196319, "grad_norm": 0.2553144097328186, "learning_rate": 6.960580581230625e-06, "loss": 0.7611202001571655, "step": 12492 }, { "epoch": 15.32883435582822, "grad_norm": 0.24536781013011932, "learning_rate": 6.957106007533826e-06, "loss": 0.7393286228179932, "step": 12493 }, { "epoch": 15.330061349693251, "grad_norm": 0.2742842137813568, "learning_rate": 6.953632161094614e-06, "loss": 0.49721163511276245, "step": 12494 }, { "epoch": 15.331288343558283, "grad_norm": 0.24849607050418854, "learning_rate": 6.950159042053023e-06, "loss": 0.47944217920303345, "step": 12495 }, { "epoch": 15.332515337423313, "grad_norm": 0.24630515277385712, "learning_rate": 6.946686650549039e-06, "loss": 0.5302742719650269, "step": 12496 }, { "epoch": 15.333742331288343, "grad_norm": 0.25350797176361084, "learning_rate": 6.9432149867226265e-06, "loss": 0.7197535037994385, "step": 12497 }, { "epoch": 15.334969325153374, "grad_norm": 0.311278760433197, "learning_rate": 6.939744050713729e-06, "loss": 0.6124275326728821, "step": 12498 }, { "epoch": 15.336196319018406, "grad_norm": 0.24471771717071533, "learning_rate": 6.936273842662228e-06, "loss": 0.6398166418075562, "step": 12499 }, { "epoch": 15.337423312883436, "grad_norm": 0.24864044785499573, "learning_rate": 6.932804362708012e-06, "loss": 0.5842732191085815, "step": 12500 }, { "epoch": 15.338650306748466, "grad_norm": 0.28065770864486694, "learning_rate": 6.929335610990919e-06, "loss": 0.5187753438949585, "step": 12501 }, { "epoch": 15.339877300613496, "grad_norm": 0.265720397233963, "learning_rate": 6.925867587650772e-06, "loss": 0.3704695403575897, "step": 12502 }, { "epoch": 15.341104294478528, "grad_norm": 0.25539538264274597, "learning_rate": 6.92240029282736e-06, "loss": 0.5993624925613403, "step": 12503 }, { "epoch": 15.342331288343559, "grad_norm": 0.3166608512401581, "learning_rate": 6.918933726660426e-06, "loss": 0.5863292813301086, "step": 12504 }, { "epoch": 15.343558282208589, "grad_norm": 0.27248114347457886, "learning_rate": 6.915467889289703e-06, "loss": 0.48468017578125, "step": 12505 }, { "epoch": 15.344785276073619, "grad_norm": 0.2517092525959015, "learning_rate": 6.912002780854893e-06, "loss": 0.5573882460594177, "step": 12506 }, { "epoch": 15.346012269938651, "grad_norm": 0.24341121315956116, "learning_rate": 6.90853840149566e-06, "loss": 0.7055473923683167, "step": 12507 }, { "epoch": 15.347239263803681, "grad_norm": 0.23969247937202454, "learning_rate": 6.905074751351653e-06, "loss": 0.6675792932510376, "step": 12508 }, { "epoch": 15.348466257668711, "grad_norm": 0.26552435755729675, "learning_rate": 6.901611830562468e-06, "loss": 0.6850205659866333, "step": 12509 }, { "epoch": 15.349693251533742, "grad_norm": 0.23289388418197632, "learning_rate": 6.898149639267692e-06, "loss": 0.48989564180374146, "step": 12510 }, { "epoch": 15.350920245398774, "grad_norm": 0.28189679980278015, "learning_rate": 6.894688177606878e-06, "loss": 0.6206682324409485, "step": 12511 }, { "epoch": 15.352147239263804, "grad_norm": 0.2677069306373596, "learning_rate": 6.891227445719542e-06, "loss": 0.9593623876571655, "step": 12512 }, { "epoch": 15.353374233128834, "grad_norm": 0.30117031931877136, "learning_rate": 6.887767443745182e-06, "loss": 0.5291734933853149, "step": 12513 }, { "epoch": 15.354601226993864, "grad_norm": 0.24600772559642792, "learning_rate": 6.884308171823259e-06, "loss": 0.6344950199127197, "step": 12514 }, { "epoch": 15.355828220858896, "grad_norm": 0.22071903944015503, "learning_rate": 6.880849630093203e-06, "loss": 0.3034170866012573, "step": 12515 }, { "epoch": 15.357055214723927, "grad_norm": 0.27256500720977783, "learning_rate": 6.877391818694423e-06, "loss": 0.5320922136306763, "step": 12516 }, { "epoch": 15.358282208588957, "grad_norm": 0.27560916543006897, "learning_rate": 6.873934737766299e-06, "loss": 0.3517828583717346, "step": 12517 }, { "epoch": 15.359509202453987, "grad_norm": 0.3043534755706787, "learning_rate": 6.870478387448162e-06, "loss": 0.5276347994804382, "step": 12518 }, { "epoch": 15.360736196319019, "grad_norm": 0.2827386260032654, "learning_rate": 6.8670227678793335e-06, "loss": 0.7307495474815369, "step": 12519 }, { "epoch": 15.36196319018405, "grad_norm": 0.23512522876262665, "learning_rate": 6.863567879199101e-06, "loss": 0.5739372968673706, "step": 12520 }, { "epoch": 15.36319018404908, "grad_norm": 0.2613798677921295, "learning_rate": 6.860113721546718e-06, "loss": 0.5691842436790466, "step": 12521 }, { "epoch": 15.36441717791411, "grad_norm": 0.2608398497104645, "learning_rate": 6.856660295061423e-06, "loss": 0.4574393630027771, "step": 12522 }, { "epoch": 15.365644171779142, "grad_norm": 0.24815206229686737, "learning_rate": 6.8532075998824e-06, "loss": 0.5742799043655396, "step": 12523 }, { "epoch": 15.366871165644172, "grad_norm": 0.2954810559749603, "learning_rate": 6.849755636148819e-06, "loss": 0.6232582926750183, "step": 12524 }, { "epoch": 15.368098159509202, "grad_norm": 0.28790077567100525, "learning_rate": 6.846304403999826e-06, "loss": 0.6836053133010864, "step": 12525 }, { "epoch": 15.369325153374232, "grad_norm": 0.2533375918865204, "learning_rate": 6.8428539035745225e-06, "loss": 0.39890342950820923, "step": 12526 }, { "epoch": 15.370552147239264, "grad_norm": 0.28059396147727966, "learning_rate": 6.839404135012004e-06, "loss": 0.4988008439540863, "step": 12527 }, { "epoch": 15.371779141104295, "grad_norm": 0.28306013345718384, "learning_rate": 6.8359550984512964e-06, "loss": 0.4321105182170868, "step": 12528 }, { "epoch": 15.373006134969325, "grad_norm": 0.25589072704315186, "learning_rate": 6.832506794031438e-06, "loss": 0.6695328950881958, "step": 12529 }, { "epoch": 15.374233128834355, "grad_norm": 0.31397128105163574, "learning_rate": 6.829059221891412e-06, "loss": 0.7809147834777832, "step": 12530 }, { "epoch": 15.375460122699387, "grad_norm": 0.29350337386131287, "learning_rate": 6.825612382170185e-06, "loss": 0.5746316313743591, "step": 12531 }, { "epoch": 15.376687116564417, "grad_norm": 0.26383811235427856, "learning_rate": 6.822166275006697e-06, "loss": 0.5745065212249756, "step": 12532 }, { "epoch": 15.377914110429447, "grad_norm": 0.2609441578388214, "learning_rate": 6.818720900539832e-06, "loss": 0.7113161087036133, "step": 12533 }, { "epoch": 15.379141104294478, "grad_norm": 0.25603505969047546, "learning_rate": 6.815276258908465e-06, "loss": 0.6104567646980286, "step": 12534 }, { "epoch": 15.38036809815951, "grad_norm": 0.28117433190345764, "learning_rate": 6.8118323502514565e-06, "loss": 0.681586503982544, "step": 12535 }, { "epoch": 15.38159509202454, "grad_norm": 0.2675263285636902, "learning_rate": 6.808389174707619e-06, "loss": 0.5291818380355835, "step": 12536 }, { "epoch": 15.38282208588957, "grad_norm": 0.21553802490234375, "learning_rate": 6.804946732415723e-06, "loss": 0.39942461252212524, "step": 12537 }, { "epoch": 15.3840490797546, "grad_norm": 0.2532212734222412, "learning_rate": 6.801505023514529e-06, "loss": 0.6536940336227417, "step": 12538 }, { "epoch": 15.385276073619632, "grad_norm": 0.2672671973705292, "learning_rate": 6.798064048142763e-06, "loss": 0.6466464996337891, "step": 12539 }, { "epoch": 15.386503067484663, "grad_norm": 0.24771377444267273, "learning_rate": 6.794623806439123e-06, "loss": 0.48526865243911743, "step": 12540 }, { "epoch": 15.387730061349693, "grad_norm": 0.25656282901763916, "learning_rate": 6.791184298542283e-06, "loss": 0.5382064580917358, "step": 12541 }, { "epoch": 15.388957055214725, "grad_norm": 0.2815312445163727, "learning_rate": 6.78774552459086e-06, "loss": 0.5201386213302612, "step": 12542 }, { "epoch": 15.390184049079755, "grad_norm": 0.2698589265346527, "learning_rate": 6.784307484723474e-06, "loss": 0.5123649835586548, "step": 12543 }, { "epoch": 15.391411042944785, "grad_norm": 0.25116801261901855, "learning_rate": 6.7808701790787e-06, "loss": 0.7142969369888306, "step": 12544 }, { "epoch": 15.392638036809815, "grad_norm": 0.2812820374965668, "learning_rate": 6.777433607795086e-06, "loss": 0.6056139469146729, "step": 12545 }, { "epoch": 15.393865030674847, "grad_norm": 0.2514791190624237, "learning_rate": 6.773997771011153e-06, "loss": 0.7683507204055786, "step": 12546 }, { "epoch": 15.395092024539878, "grad_norm": 0.2767903506755829, "learning_rate": 6.770562668865394e-06, "loss": 0.4585188627243042, "step": 12547 }, { "epoch": 15.396319018404908, "grad_norm": 0.2929478585720062, "learning_rate": 6.767128301496253e-06, "loss": 0.8874338865280151, "step": 12548 }, { "epoch": 15.397546012269938, "grad_norm": 0.3159283995628357, "learning_rate": 6.76369466904217e-06, "loss": 0.6559381484985352, "step": 12549 }, { "epoch": 15.39877300613497, "grad_norm": 0.2537139654159546, "learning_rate": 6.760261771641541e-06, "loss": 0.8299261331558228, "step": 12550 }, { "epoch": 15.4, "grad_norm": 0.30347418785095215, "learning_rate": 6.756829609432741e-06, "loss": 0.6255655288696289, "step": 12551 }, { "epoch": 15.40122699386503, "grad_norm": 0.29987385869026184, "learning_rate": 6.753398182554116e-06, "loss": 0.5333330035209656, "step": 12552 }, { "epoch": 15.40245398773006, "grad_norm": 0.28300055861473083, "learning_rate": 6.7499674911439605e-06, "loss": 0.3547270596027374, "step": 12553 }, { "epoch": 15.403680981595093, "grad_norm": 0.2414933741092682, "learning_rate": 6.746537535340563e-06, "loss": 0.7045456767082214, "step": 12554 }, { "epoch": 15.404907975460123, "grad_norm": 0.329489141702652, "learning_rate": 6.7431083152821785e-06, "loss": 0.7958903908729553, "step": 12555 }, { "epoch": 15.406134969325153, "grad_norm": 0.26023826003074646, "learning_rate": 6.739679831107029e-06, "loss": 0.4713357090950012, "step": 12556 }, { "epoch": 15.407361963190183, "grad_norm": 0.2851620316505432, "learning_rate": 6.736252082953307e-06, "loss": 0.6867093443870544, "step": 12557 }, { "epoch": 15.408588957055215, "grad_norm": 0.2713218927383423, "learning_rate": 6.73282507095917e-06, "loss": 0.5392357110977173, "step": 12558 }, { "epoch": 15.409815950920246, "grad_norm": 0.2720114588737488, "learning_rate": 6.729398795262757e-06, "loss": 0.7809208631515503, "step": 12559 }, { "epoch": 15.411042944785276, "grad_norm": 0.2506328225135803, "learning_rate": 6.725973256002166e-06, "loss": 0.6383311748504639, "step": 12560 }, { "epoch": 15.412269938650306, "grad_norm": 0.270300954580307, "learning_rate": 6.722548453315483e-06, "loss": 0.5278007984161377, "step": 12561 }, { "epoch": 15.413496932515338, "grad_norm": 0.30816590785980225, "learning_rate": 6.719124387340736e-06, "loss": 0.768875002861023, "step": 12562 }, { "epoch": 15.414723926380368, "grad_norm": 0.24907808005809784, "learning_rate": 6.715701058215948e-06, "loss": 0.6712051630020142, "step": 12563 }, { "epoch": 15.415950920245399, "grad_norm": 0.30701398849487305, "learning_rate": 6.7122784660791e-06, "loss": 0.7916603088378906, "step": 12564 }, { "epoch": 15.417177914110429, "grad_norm": 0.25365832448005676, "learning_rate": 6.7088566110681486e-06, "loss": 0.6492905616760254, "step": 12565 }, { "epoch": 15.41840490797546, "grad_norm": 0.2983369827270508, "learning_rate": 6.705435493321027e-06, "loss": 0.7283387184143066, "step": 12566 }, { "epoch": 15.419631901840491, "grad_norm": 0.3014501929283142, "learning_rate": 6.702015112975615e-06, "loss": 0.4557858407497406, "step": 12567 }, { "epoch": 15.420858895705521, "grad_norm": 0.2517406940460205, "learning_rate": 6.698595470169786e-06, "loss": 0.3754396140575409, "step": 12568 }, { "epoch": 15.422085889570551, "grad_norm": 0.2982907295227051, "learning_rate": 6.695176565041378e-06, "loss": 0.8569538593292236, "step": 12569 }, { "epoch": 15.423312883435583, "grad_norm": 0.27103886008262634, "learning_rate": 6.691758397728196e-06, "loss": 0.6849044561386108, "step": 12570 }, { "epoch": 15.424539877300614, "grad_norm": 0.26741135120391846, "learning_rate": 6.68834096836802e-06, "loss": 0.6180466413497925, "step": 12571 }, { "epoch": 15.425766871165644, "grad_norm": 0.2696172595024109, "learning_rate": 6.6849242770985896e-06, "loss": 0.6038616895675659, "step": 12572 }, { "epoch": 15.426993865030674, "grad_norm": 0.30007290840148926, "learning_rate": 6.681508324057622e-06, "loss": 0.5063232183456421, "step": 12573 }, { "epoch": 15.428220858895706, "grad_norm": 0.24383045732975006, "learning_rate": 6.678093109382813e-06, "loss": 0.45700711011886597, "step": 12574 }, { "epoch": 15.429447852760736, "grad_norm": 0.24160292744636536, "learning_rate": 6.674678633211811e-06, "loss": 0.4915914237499237, "step": 12575 }, { "epoch": 15.430674846625767, "grad_norm": 0.24942222237586975, "learning_rate": 6.671264895682256e-06, "loss": 0.6497023105621338, "step": 12576 }, { "epoch": 15.431901840490797, "grad_norm": 0.24977879226207733, "learning_rate": 6.667851896931731e-06, "loss": 0.5954406261444092, "step": 12577 }, { "epoch": 15.433128834355829, "grad_norm": 0.2335703819990158, "learning_rate": 6.6644396370978024e-06, "loss": 0.5549836754798889, "step": 12578 }, { "epoch": 15.434355828220859, "grad_norm": 0.24130719900131226, "learning_rate": 6.661028116318025e-06, "loss": 0.6447414755821228, "step": 12579 }, { "epoch": 15.43558282208589, "grad_norm": 0.25847041606903076, "learning_rate": 6.6576173347299095e-06, "loss": 0.660491943359375, "step": 12580 }, { "epoch": 15.43680981595092, "grad_norm": 0.2749135196208954, "learning_rate": 6.654207292470915e-06, "loss": 0.4960040748119354, "step": 12581 }, { "epoch": 15.438036809815952, "grad_norm": 0.2635905146598816, "learning_rate": 6.650797989678503e-06, "loss": 0.7167783975601196, "step": 12582 }, { "epoch": 15.439263803680982, "grad_norm": 0.28742051124572754, "learning_rate": 6.647389426490089e-06, "loss": 0.5971431732177734, "step": 12583 }, { "epoch": 15.440490797546012, "grad_norm": 0.2324191927909851, "learning_rate": 6.6439816030430645e-06, "loss": 0.5271351337432861, "step": 12584 }, { "epoch": 15.441717791411042, "grad_norm": 0.2999553680419922, "learning_rate": 6.640574519474796e-06, "loss": 0.656105637550354, "step": 12585 }, { "epoch": 15.442944785276074, "grad_norm": 0.2163992077112198, "learning_rate": 6.637168175922598e-06, "loss": 0.4233227074146271, "step": 12586 }, { "epoch": 15.444171779141104, "grad_norm": 0.2835901379585266, "learning_rate": 6.633762572523777e-06, "loss": 0.5208154916763306, "step": 12587 }, { "epoch": 15.445398773006135, "grad_norm": 0.25898846983909607, "learning_rate": 6.630357709415608e-06, "loss": 0.8300080299377441, "step": 12588 }, { "epoch": 15.446625766871165, "grad_norm": 0.26736724376678467, "learning_rate": 6.626953586735324e-06, "loss": 0.6677039861679077, "step": 12589 }, { "epoch": 15.447852760736197, "grad_norm": 0.22129201889038086, "learning_rate": 6.623550204620149e-06, "loss": 0.49092215299606323, "step": 12590 }, { "epoch": 15.449079754601227, "grad_norm": 0.2709410488605499, "learning_rate": 6.620147563207246e-06, "loss": 0.6348648071289062, "step": 12591 }, { "epoch": 15.450306748466257, "grad_norm": 0.2500791847705841, "learning_rate": 6.616745662633775e-06, "loss": 0.6399420499801636, "step": 12592 }, { "epoch": 15.451533742331288, "grad_norm": 0.2453768402338028, "learning_rate": 6.613344503036856e-06, "loss": 0.6013742685317993, "step": 12593 }, { "epoch": 15.45276073619632, "grad_norm": 0.2718251645565033, "learning_rate": 6.609944084553576e-06, "loss": 0.7351937890052795, "step": 12594 }, { "epoch": 15.45398773006135, "grad_norm": 0.2745310962200165, "learning_rate": 6.606544407321011e-06, "loss": 0.6231175661087036, "step": 12595 }, { "epoch": 15.45521472392638, "grad_norm": 0.2998960614204407, "learning_rate": 6.603145471476171e-06, "loss": 0.5784717798233032, "step": 12596 }, { "epoch": 15.45644171779141, "grad_norm": 0.27124258875846863, "learning_rate": 6.599747277156071e-06, "loss": 0.755896806716919, "step": 12597 }, { "epoch": 15.457668711656442, "grad_norm": 0.26012173295021057, "learning_rate": 6.596349824497678e-06, "loss": 0.5613969564437866, "step": 12598 }, { "epoch": 15.458895705521472, "grad_norm": 0.26763930916786194, "learning_rate": 6.592953113637932e-06, "loss": 0.7488473653793335, "step": 12599 }, { "epoch": 15.460122699386503, "grad_norm": 0.233550027012825, "learning_rate": 6.5895571447137565e-06, "loss": 0.538532018661499, "step": 12600 }, { "epoch": 15.461349693251535, "grad_norm": 0.2664233148097992, "learning_rate": 6.5861619178620106e-06, "loss": 0.5001634955406189, "step": 12601 }, { "epoch": 15.462576687116565, "grad_norm": 0.29537317156791687, "learning_rate": 6.582767433219567e-06, "loss": 0.6215505003929138, "step": 12602 }, { "epoch": 15.463803680981595, "grad_norm": 0.24055792391300201, "learning_rate": 6.579373690923241e-06, "loss": 0.5307033061981201, "step": 12603 }, { "epoch": 15.465030674846625, "grad_norm": 0.2440069019794464, "learning_rate": 6.575980691109829e-06, "loss": 0.5860209465026855, "step": 12604 }, { "epoch": 15.466257668711656, "grad_norm": 0.27054834365844727, "learning_rate": 6.5725884339160825e-06, "loss": 0.5757009983062744, "step": 12605 }, { "epoch": 15.467484662576688, "grad_norm": 0.30019015073776245, "learning_rate": 6.56919691947874e-06, "loss": 0.8380770087242126, "step": 12606 }, { "epoch": 15.468711656441718, "grad_norm": 0.24672161042690277, "learning_rate": 6.565806147934503e-06, "loss": 0.622115969657898, "step": 12607 }, { "epoch": 15.469938650306748, "grad_norm": 0.27032244205474854, "learning_rate": 6.562416119420045e-06, "loss": 0.4992094039916992, "step": 12608 }, { "epoch": 15.47116564417178, "grad_norm": 0.22421543300151825, "learning_rate": 6.559026834072013e-06, "loss": 0.501268744468689, "step": 12609 }, { "epoch": 15.47239263803681, "grad_norm": 0.2747969627380371, "learning_rate": 6.5556382920270084e-06, "loss": 0.4403133988380432, "step": 12610 }, { "epoch": 15.47361963190184, "grad_norm": 0.2864725887775421, "learning_rate": 6.552250493421619e-06, "loss": 0.789664626121521, "step": 12611 }, { "epoch": 15.47484662576687, "grad_norm": 0.27285897731781006, "learning_rate": 6.548863438392397e-06, "loss": 0.6922789812088013, "step": 12612 }, { "epoch": 15.476073619631903, "grad_norm": 0.3092503547668457, "learning_rate": 6.545477127075866e-06, "loss": 0.8459310531616211, "step": 12613 }, { "epoch": 15.477300613496933, "grad_norm": 0.25392946600914, "learning_rate": 6.542091559608526e-06, "loss": 0.5711360573768616, "step": 12614 }, { "epoch": 15.478527607361963, "grad_norm": 0.2597408592700958, "learning_rate": 6.538706736126823e-06, "loss": 0.679840624332428, "step": 12615 }, { "epoch": 15.479754601226993, "grad_norm": 0.2728382349014282, "learning_rate": 6.535322656767198e-06, "loss": 0.4219786524772644, "step": 12616 }, { "epoch": 15.480981595092025, "grad_norm": 0.2699000835418701, "learning_rate": 6.531939321666056e-06, "loss": 0.6122294068336487, "step": 12617 }, { "epoch": 15.482208588957056, "grad_norm": 0.25024235248565674, "learning_rate": 6.5285567309597646e-06, "loss": 0.6284664869308472, "step": 12618 }, { "epoch": 15.483435582822086, "grad_norm": 0.30625978112220764, "learning_rate": 6.525174884784679e-06, "loss": 0.6471744775772095, "step": 12619 }, { "epoch": 15.484662576687116, "grad_norm": 0.2805422246456146, "learning_rate": 6.521793783277094e-06, "loss": 0.8565328121185303, "step": 12620 }, { "epoch": 15.485889570552148, "grad_norm": 0.2846396267414093, "learning_rate": 6.5184134265733e-06, "loss": 0.7169187068939209, "step": 12621 }, { "epoch": 15.487116564417178, "grad_norm": 0.283562570810318, "learning_rate": 6.515033814809549e-06, "loss": 0.5580624938011169, "step": 12622 }, { "epoch": 15.488343558282208, "grad_norm": 0.2587977647781372, "learning_rate": 6.5116549481220645e-06, "loss": 0.5719529986381531, "step": 12623 }, { "epoch": 15.489570552147239, "grad_norm": 0.2563442289829254, "learning_rate": 6.508276826647036e-06, "loss": 0.605044960975647, "step": 12624 }, { "epoch": 15.49079754601227, "grad_norm": 0.2589121162891388, "learning_rate": 6.504899450520633e-06, "loss": 0.7200753688812256, "step": 12625 }, { "epoch": 15.4920245398773, "grad_norm": 0.27876758575439453, "learning_rate": 6.50152281987898e-06, "loss": 0.45106688141822815, "step": 12626 }, { "epoch": 15.493251533742331, "grad_norm": 0.28381988406181335, "learning_rate": 6.498146934858185e-06, "loss": 0.7914415597915649, "step": 12627 }, { "epoch": 15.494478527607361, "grad_norm": 0.2638866901397705, "learning_rate": 6.494771795594326e-06, "loss": 0.6330105066299438, "step": 12628 }, { "epoch": 15.495705521472393, "grad_norm": 0.2616586983203888, "learning_rate": 6.4913974022234286e-06, "loss": 0.7554613351821899, "step": 12629 }, { "epoch": 15.496932515337424, "grad_norm": 0.22684821486473083, "learning_rate": 6.488023754881514e-06, "loss": 0.5339817404747009, "step": 12630 }, { "epoch": 15.498159509202454, "grad_norm": 0.27149632573127747, "learning_rate": 6.484650853704563e-06, "loss": 0.7211755514144897, "step": 12631 }, { "epoch": 15.499386503067484, "grad_norm": 0.28704115748405457, "learning_rate": 6.48127869882853e-06, "loss": 0.8756058216094971, "step": 12632 }, { "epoch": 15.500613496932516, "grad_norm": 0.26345208287239075, "learning_rate": 6.477907290389343e-06, "loss": 0.608370304107666, "step": 12633 }, { "epoch": 15.501840490797546, "grad_norm": 0.27768564224243164, "learning_rate": 6.474536628522878e-06, "loss": 0.6496096849441528, "step": 12634 }, { "epoch": 15.503067484662576, "grad_norm": 0.26063260436058044, "learning_rate": 6.471166713365007e-06, "loss": 0.758327066898346, "step": 12635 }, { "epoch": 15.504294478527607, "grad_norm": 0.25481659173965454, "learning_rate": 6.467797545051557e-06, "loss": 0.6952396631240845, "step": 12636 }, { "epoch": 15.505521472392639, "grad_norm": 0.3018667995929718, "learning_rate": 6.464429123718335e-06, "loss": 0.5571727752685547, "step": 12637 }, { "epoch": 15.506748466257669, "grad_norm": 0.30636975169181824, "learning_rate": 6.461061449501118e-06, "loss": 0.7034263610839844, "step": 12638 }, { "epoch": 15.5079754601227, "grad_norm": 0.2439998984336853, "learning_rate": 6.457694522535629e-06, "loss": 0.7352072596549988, "step": 12639 }, { "epoch": 15.50920245398773, "grad_norm": 0.23599617183208466, "learning_rate": 6.454328342957591e-06, "loss": 0.6281602382659912, "step": 12640 }, { "epoch": 15.510429447852761, "grad_norm": 0.265219122171402, "learning_rate": 6.450962910902686e-06, "loss": 0.6596067547798157, "step": 12641 }, { "epoch": 15.511656441717792, "grad_norm": 0.2537754774093628, "learning_rate": 6.44759822650656e-06, "loss": 0.43383145332336426, "step": 12642 }, { "epoch": 15.512883435582822, "grad_norm": 0.27032986283302307, "learning_rate": 6.444234289904843e-06, "loss": 0.8178550004959106, "step": 12643 }, { "epoch": 15.514110429447852, "grad_norm": 0.30424201488494873, "learning_rate": 6.440871101233112e-06, "loss": 0.6170965433120728, "step": 12644 }, { "epoch": 15.515337423312884, "grad_norm": 0.3042619824409485, "learning_rate": 6.437508660626928e-06, "loss": 0.6696580052375793, "step": 12645 }, { "epoch": 15.516564417177914, "grad_norm": 0.2640962302684784, "learning_rate": 6.4341469682218355e-06, "loss": 0.6745272874832153, "step": 12646 }, { "epoch": 15.517791411042944, "grad_norm": 0.2485523223876953, "learning_rate": 6.430786024153335e-06, "loss": 0.3476497232913971, "step": 12647 }, { "epoch": 15.519018404907975, "grad_norm": 0.30223768949508667, "learning_rate": 6.42742582855688e-06, "loss": 0.403568297624588, "step": 12648 }, { "epoch": 15.520245398773007, "grad_norm": 0.26073768734931946, "learning_rate": 6.4240663815679195e-06, "loss": 0.3693571090698242, "step": 12649 }, { "epoch": 15.521472392638037, "grad_norm": 0.24516205489635468, "learning_rate": 6.4207076833218634e-06, "loss": 0.49033188819885254, "step": 12650 }, { "epoch": 15.522699386503067, "grad_norm": 0.2745336592197418, "learning_rate": 6.417349733954092e-06, "loss": 0.5750452876091003, "step": 12651 }, { "epoch": 15.523926380368097, "grad_norm": 0.27477148175239563, "learning_rate": 6.413992533599952e-06, "loss": 0.523051917552948, "step": 12652 }, { "epoch": 15.52515337423313, "grad_norm": 0.23432229459285736, "learning_rate": 6.410636082394772e-06, "loss": 0.3619841933250427, "step": 12653 }, { "epoch": 15.52638036809816, "grad_norm": 0.2662513256072998, "learning_rate": 6.4072803804738235e-06, "loss": 0.701650083065033, "step": 12654 }, { "epoch": 15.52760736196319, "grad_norm": 0.265180766582489, "learning_rate": 6.403925427972377e-06, "loss": 0.41569605469703674, "step": 12655 }, { "epoch": 15.52883435582822, "grad_norm": 0.3280431628227234, "learning_rate": 6.400571225025659e-06, "loss": 0.6278194189071655, "step": 12656 }, { "epoch": 15.530061349693252, "grad_norm": 0.2727596163749695, "learning_rate": 6.3972177717688684e-06, "loss": 0.5526400804519653, "step": 12657 }, { "epoch": 15.531288343558282, "grad_norm": 0.3113519251346588, "learning_rate": 6.3938650683371765e-06, "loss": 0.7130610942840576, "step": 12658 }, { "epoch": 15.532515337423312, "grad_norm": 0.2814340889453888, "learning_rate": 6.3905131148657125e-06, "loss": 0.7124536037445068, "step": 12659 }, { "epoch": 15.533742331288344, "grad_norm": 0.24531453847885132, "learning_rate": 6.387161911489589e-06, "loss": 0.43735820055007935, "step": 12660 }, { "epoch": 15.534969325153375, "grad_norm": 0.27714839577674866, "learning_rate": 6.383811458343883e-06, "loss": 0.5401633977890015, "step": 12661 }, { "epoch": 15.536196319018405, "grad_norm": 0.28669098019599915, "learning_rate": 6.380461755563641e-06, "loss": 0.5255818367004395, "step": 12662 }, { "epoch": 15.537423312883435, "grad_norm": 0.2787538468837738, "learning_rate": 6.377112803283891e-06, "loss": 0.6990307569503784, "step": 12663 }, { "epoch": 15.538650306748465, "grad_norm": 0.2794278562068939, "learning_rate": 6.373764601639601e-06, "loss": 0.8240551948547363, "step": 12664 }, { "epoch": 15.539877300613497, "grad_norm": 0.2485922873020172, "learning_rate": 6.370417150765734e-06, "loss": 0.6459782123565674, "step": 12665 }, { "epoch": 15.541104294478528, "grad_norm": 0.282296746969223, "learning_rate": 6.367070450797222e-06, "loss": 0.5041948556900024, "step": 12666 }, { "epoch": 15.542331288343558, "grad_norm": 0.2715533673763275, "learning_rate": 6.363724501868956e-06, "loss": 0.623053789138794, "step": 12667 }, { "epoch": 15.54355828220859, "grad_norm": 0.2684827744960785, "learning_rate": 6.360379304115802e-06, "loss": 0.7178043127059937, "step": 12668 }, { "epoch": 15.54478527607362, "grad_norm": 0.2553316056728363, "learning_rate": 6.3570348576725975e-06, "loss": 0.544590950012207, "step": 12669 }, { "epoch": 15.54601226993865, "grad_norm": 0.2666205167770386, "learning_rate": 6.3536911626741465e-06, "loss": 0.5822165012359619, "step": 12670 }, { "epoch": 15.54723926380368, "grad_norm": 0.32290658354759216, "learning_rate": 6.3503482192552226e-06, "loss": 0.5823752880096436, "step": 12671 }, { "epoch": 15.548466257668712, "grad_norm": 0.2711541950702667, "learning_rate": 6.347006027550581e-06, "loss": 0.5875043869018555, "step": 12672 }, { "epoch": 15.549693251533743, "grad_norm": 0.26674753427505493, "learning_rate": 6.343664587694917e-06, "loss": 0.7252727746963501, "step": 12673 }, { "epoch": 15.550920245398773, "grad_norm": 0.2932809889316559, "learning_rate": 6.340323899822925e-06, "loss": 0.7301102876663208, "step": 12674 }, { "epoch": 15.552147239263803, "grad_norm": 0.26098281145095825, "learning_rate": 6.336983964069257e-06, "loss": 0.6959563493728638, "step": 12675 }, { "epoch": 15.553374233128835, "grad_norm": 0.30181950330734253, "learning_rate": 6.333644780568537e-06, "loss": 0.4943467378616333, "step": 12676 }, { "epoch": 15.554601226993865, "grad_norm": 0.2543012797832489, "learning_rate": 6.330306349455364e-06, "loss": 0.47937870025634766, "step": 12677 }, { "epoch": 15.555828220858896, "grad_norm": 0.26672592759132385, "learning_rate": 6.3269686708642885e-06, "loss": 0.600297212600708, "step": 12678 }, { "epoch": 15.557055214723926, "grad_norm": 0.31654518842697144, "learning_rate": 6.323631744929851e-06, "loss": 0.4493817985057831, "step": 12679 }, { "epoch": 15.558282208588958, "grad_norm": 0.26419544219970703, "learning_rate": 6.320295571786547e-06, "loss": 0.6081829071044922, "step": 12680 }, { "epoch": 15.559509202453988, "grad_norm": 0.265716552734375, "learning_rate": 6.316960151568854e-06, "loss": 0.4897388517856598, "step": 12681 }, { "epoch": 15.560736196319018, "grad_norm": 0.2600662112236023, "learning_rate": 6.313625484411221e-06, "loss": 0.7274485230445862, "step": 12682 }, { "epoch": 15.561963190184048, "grad_norm": 0.2719239592552185, "learning_rate": 6.310291570448037e-06, "loss": 0.6163476705551147, "step": 12683 }, { "epoch": 15.56319018404908, "grad_norm": 0.2887811064720154, "learning_rate": 6.306958409813699e-06, "loss": 0.4645140469074249, "step": 12684 }, { "epoch": 15.56441717791411, "grad_norm": 0.25993403792381287, "learning_rate": 6.303626002642554e-06, "loss": 0.5732084512710571, "step": 12685 }, { "epoch": 15.565644171779141, "grad_norm": 0.24066682159900665, "learning_rate": 6.300294349068919e-06, "loss": 0.48813140392303467, "step": 12686 }, { "epoch": 15.566871165644171, "grad_norm": 0.30281075835227966, "learning_rate": 6.2969634492270945e-06, "loss": 0.49774619936943054, "step": 12687 }, { "epoch": 15.568098159509203, "grad_norm": 0.2748267948627472, "learning_rate": 6.29363330325132e-06, "loss": 0.7313203811645508, "step": 12688 }, { "epoch": 15.569325153374233, "grad_norm": 0.30050960183143616, "learning_rate": 6.290303911275838e-06, "loss": 0.5709744691848755, "step": 12689 }, { "epoch": 15.570552147239264, "grad_norm": 0.3453064262866974, "learning_rate": 6.286975273434837e-06, "loss": 0.6528934836387634, "step": 12690 }, { "epoch": 15.571779141104294, "grad_norm": 0.28385472297668457, "learning_rate": 6.283647389862504e-06, "loss": 0.6504421830177307, "step": 12691 }, { "epoch": 15.573006134969326, "grad_norm": 0.24294598400592804, "learning_rate": 6.280320260692957e-06, "loss": 0.47289299964904785, "step": 12692 }, { "epoch": 15.574233128834356, "grad_norm": 0.24132931232452393, "learning_rate": 6.276993886060309e-06, "loss": 0.605129599571228, "step": 12693 }, { "epoch": 15.575460122699386, "grad_norm": 0.3317714035511017, "learning_rate": 6.273668266098639e-06, "loss": 0.71133953332901, "step": 12694 }, { "epoch": 15.576687116564417, "grad_norm": 0.7336897253990173, "learning_rate": 6.2703434009419945e-06, "loss": 0.4194180369377136, "step": 12695 }, { "epoch": 15.577914110429449, "grad_norm": 0.28295570611953735, "learning_rate": 6.267019290724393e-06, "loss": 0.3858744204044342, "step": 12696 }, { "epoch": 15.579141104294479, "grad_norm": 0.2757376432418823, "learning_rate": 6.263695935579808e-06, "loss": 0.6008862853050232, "step": 12697 }, { "epoch": 15.580368098159509, "grad_norm": 0.24856464564800262, "learning_rate": 6.260373335642206e-06, "loss": 0.5923763513565063, "step": 12698 }, { "epoch": 15.58159509202454, "grad_norm": 0.24088969826698303, "learning_rate": 6.257051491045504e-06, "loss": 0.6753265261650085, "step": 12699 }, { "epoch": 15.582822085889571, "grad_norm": 0.2755751609802246, "learning_rate": 6.253730401923602e-06, "loss": 0.4082585275173187, "step": 12700 }, { "epoch": 15.584049079754601, "grad_norm": 0.2690596580505371, "learning_rate": 6.250410068410367e-06, "loss": 0.6096552610397339, "step": 12701 }, { "epoch": 15.585276073619632, "grad_norm": 0.28138870000839233, "learning_rate": 6.247090490639621e-06, "loss": 0.426154762506485, "step": 12702 }, { "epoch": 15.586503067484662, "grad_norm": 0.25349128246307373, "learning_rate": 6.243771668745172e-06, "loss": 0.6221270561218262, "step": 12703 }, { "epoch": 15.587730061349694, "grad_norm": 0.29831984639167786, "learning_rate": 6.240453602860791e-06, "loss": 0.7546730041503906, "step": 12704 }, { "epoch": 15.588957055214724, "grad_norm": 0.26515695452690125, "learning_rate": 6.237136293120221e-06, "loss": 0.771337628364563, "step": 12705 }, { "epoch": 15.590184049079754, "grad_norm": 0.2788117825984955, "learning_rate": 6.233819739657185e-06, "loss": 0.7097643613815308, "step": 12706 }, { "epoch": 15.591411042944785, "grad_norm": 0.2841852307319641, "learning_rate": 6.230503942605342e-06, "loss": 0.5449917316436768, "step": 12707 }, { "epoch": 15.592638036809817, "grad_norm": 0.26209187507629395, "learning_rate": 6.2271889020983524e-06, "loss": 0.6927297115325928, "step": 12708 }, { "epoch": 15.593865030674847, "grad_norm": 0.2738935351371765, "learning_rate": 6.2238746182698375e-06, "loss": 0.6987613439559937, "step": 12709 }, { "epoch": 15.595092024539877, "grad_norm": 0.25045594573020935, "learning_rate": 6.2205610912533855e-06, "loss": 0.6584089994430542, "step": 12710 }, { "epoch": 15.596319018404907, "grad_norm": 0.31742873787879944, "learning_rate": 6.217248321182562e-06, "loss": 0.532676637172699, "step": 12711 }, { "epoch": 15.59754601226994, "grad_norm": 0.25357648730278015, "learning_rate": 6.213936308190876e-06, "loss": 0.6498099565505981, "step": 12712 }, { "epoch": 15.59877300613497, "grad_norm": 0.2564086318016052, "learning_rate": 6.210625052411842e-06, "loss": 0.5621019005775452, "step": 12713 }, { "epoch": 15.6, "grad_norm": 0.2674868106842041, "learning_rate": 6.207314553978927e-06, "loss": 0.4440805912017822, "step": 12714 }, { "epoch": 15.60122699386503, "grad_norm": 0.2734711468219757, "learning_rate": 6.204004813025568e-06, "loss": 0.6888058185577393, "step": 12715 }, { "epoch": 15.602453987730062, "grad_norm": 0.2680978775024414, "learning_rate": 6.2006958296851605e-06, "loss": 0.5323243141174316, "step": 12716 }, { "epoch": 15.603680981595092, "grad_norm": 0.26495790481567383, "learning_rate": 6.197387604091087e-06, "loss": 0.4646916687488556, "step": 12717 }, { "epoch": 15.604907975460122, "grad_norm": 0.25493520498275757, "learning_rate": 6.194080136376693e-06, "loss": 0.42554816603660583, "step": 12718 }, { "epoch": 15.606134969325154, "grad_norm": 0.2934620678424835, "learning_rate": 6.190773426675292e-06, "loss": 0.6788159012794495, "step": 12719 }, { "epoch": 15.607361963190185, "grad_norm": 0.25201570987701416, "learning_rate": 6.1874674751201765e-06, "loss": 0.4406859874725342, "step": 12720 }, { "epoch": 15.608588957055215, "grad_norm": 0.29678988456726074, "learning_rate": 6.184162281844586e-06, "loss": 0.6580303311347961, "step": 12721 }, { "epoch": 15.609815950920245, "grad_norm": 0.3087843358516693, "learning_rate": 6.180857846981747e-06, "loss": 0.6154788136482239, "step": 12722 }, { "epoch": 15.611042944785275, "grad_norm": 0.24942359328269958, "learning_rate": 6.1775541706648534e-06, "loss": 0.51401287317276, "step": 12723 }, { "epoch": 15.612269938650307, "grad_norm": 0.29716578125953674, "learning_rate": 6.174251253027072e-06, "loss": 0.8549864292144775, "step": 12724 }, { "epoch": 15.613496932515337, "grad_norm": 0.3055764138698578, "learning_rate": 6.170949094201534e-06, "loss": 0.6854010820388794, "step": 12725 }, { "epoch": 15.614723926380368, "grad_norm": 0.26130056381225586, "learning_rate": 6.167647694321329e-06, "loss": 0.60771644115448, "step": 12726 }, { "epoch": 15.6159509202454, "grad_norm": 0.27037137746810913, "learning_rate": 6.164347053519534e-06, "loss": 0.49860870838165283, "step": 12727 }, { "epoch": 15.61717791411043, "grad_norm": 0.2778491675853729, "learning_rate": 6.161047171929191e-06, "loss": 0.6923577785491943, "step": 12728 }, { "epoch": 15.61840490797546, "grad_norm": 0.2481425702571869, "learning_rate": 6.157748049683301e-06, "loss": 0.4492827355861664, "step": 12729 }, { "epoch": 15.61963190184049, "grad_norm": 0.28364211320877075, "learning_rate": 6.154449686914859e-06, "loss": 0.5787389874458313, "step": 12730 }, { "epoch": 15.62085889570552, "grad_norm": 0.2554014325141907, "learning_rate": 6.151152083756792e-06, "loss": 0.6676608920097351, "step": 12731 }, { "epoch": 15.622085889570553, "grad_norm": 0.3010959327220917, "learning_rate": 6.147855240342026e-06, "loss": 0.7275147438049316, "step": 12732 }, { "epoch": 15.623312883435583, "grad_norm": 0.2923167049884796, "learning_rate": 6.144559156803448e-06, "loss": 0.5282667875289917, "step": 12733 }, { "epoch": 15.624539877300613, "grad_norm": 0.32053256034851074, "learning_rate": 6.1412638332739105e-06, "loss": 0.6313478946685791, "step": 12734 }, { "epoch": 15.625766871165645, "grad_norm": 0.2992931604385376, "learning_rate": 6.137969269886243e-06, "loss": 0.5853064060211182, "step": 12735 }, { "epoch": 15.626993865030675, "grad_norm": 0.31652650237083435, "learning_rate": 6.134675466773237e-06, "loss": 0.568359375, "step": 12736 }, { "epoch": 15.628220858895705, "grad_norm": 0.2889907658100128, "learning_rate": 6.1313824240676585e-06, "loss": 0.6058427095413208, "step": 12737 }, { "epoch": 15.629447852760736, "grad_norm": 0.23003803193569183, "learning_rate": 6.12809014190224e-06, "loss": 0.5155072212219238, "step": 12738 }, { "epoch": 15.630674846625768, "grad_norm": 0.3223899006843567, "learning_rate": 6.1247986204096885e-06, "loss": 0.421054482460022, "step": 12739 }, { "epoch": 15.631901840490798, "grad_norm": 0.30444830656051636, "learning_rate": 6.121507859722667e-06, "loss": 0.4146268963813782, "step": 12740 }, { "epoch": 15.633128834355828, "grad_norm": 0.27449312806129456, "learning_rate": 6.118217859973818e-06, "loss": 0.7115697264671326, "step": 12741 }, { "epoch": 15.634355828220858, "grad_norm": 0.26842570304870605, "learning_rate": 6.114928621295754e-06, "loss": 0.5848277807235718, "step": 12742 }, { "epoch": 15.63558282208589, "grad_norm": 0.2924489676952362, "learning_rate": 6.111640143821057e-06, "loss": 0.7480250597000122, "step": 12743 }, { "epoch": 15.63680981595092, "grad_norm": 0.2914598882198334, "learning_rate": 6.108352427682282e-06, "loss": 0.46164244413375854, "step": 12744 }, { "epoch": 15.63803680981595, "grad_norm": 0.2786026895046234, "learning_rate": 6.105065473011931e-06, "loss": 0.5653871297836304, "step": 12745 }, { "epoch": 15.639263803680981, "grad_norm": 0.3047463893890381, "learning_rate": 6.101779279942502e-06, "loss": 0.6735193133354187, "step": 12746 }, { "epoch": 15.640490797546013, "grad_norm": 0.25980091094970703, "learning_rate": 6.098493848606452e-06, "loss": 0.6331287622451782, "step": 12747 }, { "epoch": 15.641717791411043, "grad_norm": 0.2840439975261688, "learning_rate": 6.095209179136202e-06, "loss": 0.6353942155838013, "step": 12748 }, { "epoch": 15.642944785276073, "grad_norm": 0.3176382780075073, "learning_rate": 6.091925271664156e-06, "loss": 0.4796043634414673, "step": 12749 }, { "epoch": 15.644171779141104, "grad_norm": 0.2933051288127899, "learning_rate": 6.08864212632268e-06, "loss": 0.4882825016975403, "step": 12750 }, { "epoch": 15.645398773006136, "grad_norm": 0.2660214900970459, "learning_rate": 6.085359743244096e-06, "loss": 0.6790878772735596, "step": 12751 }, { "epoch": 15.646625766871166, "grad_norm": 0.259329617023468, "learning_rate": 6.082078122560716e-06, "loss": 0.5935578942298889, "step": 12752 }, { "epoch": 15.647852760736196, "grad_norm": 0.27024829387664795, "learning_rate": 6.078797264404809e-06, "loss": 0.52534019947052, "step": 12753 }, { "epoch": 15.649079754601226, "grad_norm": 0.2573007047176361, "learning_rate": 6.075517168908621e-06, "loss": 0.410253643989563, "step": 12754 }, { "epoch": 15.650306748466258, "grad_norm": 0.2522570490837097, "learning_rate": 6.072237836204367e-06, "loss": 0.8312133550643921, "step": 12755 }, { "epoch": 15.651533742331289, "grad_norm": 0.25257229804992676, "learning_rate": 6.0689592664242094e-06, "loss": 0.6202942728996277, "step": 12756 }, { "epoch": 15.652760736196319, "grad_norm": 0.26672542095184326, "learning_rate": 6.0656814597003175e-06, "loss": 0.47757232189178467, "step": 12757 }, { "epoch": 15.653987730061349, "grad_norm": 0.28515806794166565, "learning_rate": 6.062404416164804e-06, "loss": 0.796695351600647, "step": 12758 }, { "epoch": 15.655214723926381, "grad_norm": 0.2298574000597, "learning_rate": 6.059128135949762e-06, "loss": 0.44286221265792847, "step": 12759 }, { "epoch": 15.656441717791411, "grad_norm": 0.2652911841869354, "learning_rate": 6.055852619187241e-06, "loss": 0.5940333604812622, "step": 12760 }, { "epoch": 15.657668711656441, "grad_norm": 0.289833128452301, "learning_rate": 6.052577866009265e-06, "loss": 0.27961546182632446, "step": 12761 }, { "epoch": 15.658895705521472, "grad_norm": 0.2684957683086395, "learning_rate": 6.0493038765478405e-06, "loss": 0.4862127900123596, "step": 12762 }, { "epoch": 15.660122699386504, "grad_norm": 0.24762091040611267, "learning_rate": 6.0460306509349256e-06, "loss": 0.4005992114543915, "step": 12763 }, { "epoch": 15.661349693251534, "grad_norm": 0.2737388014793396, "learning_rate": 6.0427581893024634e-06, "loss": 0.9577149152755737, "step": 12764 }, { "epoch": 15.662576687116564, "grad_norm": 0.27030321955680847, "learning_rate": 6.039486491782342e-06, "loss": 0.5795890092849731, "step": 12765 }, { "epoch": 15.663803680981594, "grad_norm": 0.2903246581554413, "learning_rate": 6.036215558506448e-06, "loss": 0.6298291087150574, "step": 12766 }, { "epoch": 15.665030674846626, "grad_norm": 0.33900707960128784, "learning_rate": 6.032945389606615e-06, "loss": 0.5416936874389648, "step": 12767 }, { "epoch": 15.666257668711657, "grad_norm": 0.2921961545944214, "learning_rate": 6.029675985214656e-06, "loss": 0.7258239984512329, "step": 12768 }, { "epoch": 15.667484662576687, "grad_norm": 0.2571617662906647, "learning_rate": 6.026407345462362e-06, "loss": 0.562907338142395, "step": 12769 }, { "epoch": 15.668711656441717, "grad_norm": 0.2664739489555359, "learning_rate": 6.023139470481467e-06, "loss": 0.6077083945274353, "step": 12770 }, { "epoch": 15.669938650306749, "grad_norm": 0.2663196325302124, "learning_rate": 6.019872360403697e-06, "loss": 0.5248688459396362, "step": 12771 }, { "epoch": 15.67116564417178, "grad_norm": 0.28149300813674927, "learning_rate": 6.016606015360737e-06, "loss": 0.4923279881477356, "step": 12772 }, { "epoch": 15.67239263803681, "grad_norm": 0.2748773396015167, "learning_rate": 6.013340435484246e-06, "loss": 0.3890300393104553, "step": 12773 }, { "epoch": 15.67361963190184, "grad_norm": 0.258524626493454, "learning_rate": 6.010075620905858e-06, "loss": 0.47791939973831177, "step": 12774 }, { "epoch": 15.674846625766872, "grad_norm": 0.29034921526908875, "learning_rate": 6.006811571757156e-06, "loss": 0.6718595623970032, "step": 12775 }, { "epoch": 15.676073619631902, "grad_norm": 0.4051297903060913, "learning_rate": 6.003548288169708e-06, "loss": 0.4326612949371338, "step": 12776 }, { "epoch": 15.677300613496932, "grad_norm": 0.30887454748153687, "learning_rate": 6.000285770275047e-06, "loss": 0.6863148212432861, "step": 12777 }, { "epoch": 15.678527607361962, "grad_norm": 0.34483209252357483, "learning_rate": 5.9970240182046804e-06, "loss": 0.677511990070343, "step": 12778 }, { "epoch": 15.679754601226994, "grad_norm": 0.25705599784851074, "learning_rate": 5.993763032090074e-06, "loss": 0.4983939230442047, "step": 12779 }, { "epoch": 15.680981595092025, "grad_norm": 0.2536693215370178, "learning_rate": 5.990502812062676e-06, "loss": 0.4679912328720093, "step": 12780 }, { "epoch": 15.682208588957055, "grad_norm": 0.2703671157360077, "learning_rate": 5.987243358253891e-06, "loss": 0.5384006500244141, "step": 12781 }, { "epoch": 15.683435582822085, "grad_norm": 0.28375378251075745, "learning_rate": 5.983984670795101e-06, "loss": 0.631764829158783, "step": 12782 }, { "epoch": 15.684662576687117, "grad_norm": 0.27655521035194397, "learning_rate": 5.980726749817661e-06, "loss": 0.6996958255767822, "step": 12783 }, { "epoch": 15.685889570552147, "grad_norm": 0.24240727722644806, "learning_rate": 5.977469595452873e-06, "loss": 0.6931670904159546, "step": 12784 }, { "epoch": 15.687116564417177, "grad_norm": 0.30429574847221375, "learning_rate": 5.97421320783203e-06, "loss": 0.7242062091827393, "step": 12785 }, { "epoch": 15.68834355828221, "grad_norm": 0.29920464754104614, "learning_rate": 5.970957587086393e-06, "loss": 0.310752272605896, "step": 12786 }, { "epoch": 15.68957055214724, "grad_norm": 0.2528465688228607, "learning_rate": 5.96770273334718e-06, "loss": 0.3136661946773529, "step": 12787 }, { "epoch": 15.69079754601227, "grad_norm": 0.2314869612455368, "learning_rate": 5.964448646745596e-06, "loss": 0.5850871801376343, "step": 12788 }, { "epoch": 15.6920245398773, "grad_norm": 0.21641840040683746, "learning_rate": 5.961195327412792e-06, "loss": 0.5487114191055298, "step": 12789 }, { "epoch": 15.69325153374233, "grad_norm": 0.31347835063934326, "learning_rate": 5.957942775479899e-06, "loss": 0.5058709383010864, "step": 12790 }, { "epoch": 15.694478527607362, "grad_norm": 0.27960941195487976, "learning_rate": 5.954690991078027e-06, "loss": 0.7170905470848083, "step": 12791 }, { "epoch": 15.695705521472393, "grad_norm": 0.23721815645694733, "learning_rate": 5.951439974338238e-06, "loss": 0.4438845217227936, "step": 12792 }, { "epoch": 15.696932515337423, "grad_norm": 0.284920871257782, "learning_rate": 5.948189725391587e-06, "loss": 0.4460713267326355, "step": 12793 }, { "epoch": 15.698159509202455, "grad_norm": 0.2777925431728363, "learning_rate": 5.94494024436906e-06, "loss": 0.5067608952522278, "step": 12794 }, { "epoch": 15.699386503067485, "grad_norm": 0.28021085262298584, "learning_rate": 5.941691531401647e-06, "loss": 0.5634728670120239, "step": 12795 }, { "epoch": 15.700613496932515, "grad_norm": 0.24852454662322998, "learning_rate": 5.938443586620293e-06, "loss": 0.6653916239738464, "step": 12796 }, { "epoch": 15.701840490797546, "grad_norm": 0.2950904369354248, "learning_rate": 5.935196410155911e-06, "loss": 0.6361713409423828, "step": 12797 }, { "epoch": 15.703067484662578, "grad_norm": 0.26248905062675476, "learning_rate": 5.931950002139394e-06, "loss": 0.6554371118545532, "step": 12798 }, { "epoch": 15.704294478527608, "grad_norm": 0.28935515880584717, "learning_rate": 5.928704362701584e-06, "loss": 0.48411956429481506, "step": 12799 }, { "epoch": 15.705521472392638, "grad_norm": 0.2560102641582489, "learning_rate": 5.925459491973306e-06, "loss": 0.6865326166152954, "step": 12800 }, { "epoch": 15.706748466257668, "grad_norm": 0.27545857429504395, "learning_rate": 5.922215390085348e-06, "loss": 0.5033490061759949, "step": 12801 }, { "epoch": 15.7079754601227, "grad_norm": 0.25140801072120667, "learning_rate": 5.9189720571684894e-06, "loss": 0.5144493579864502, "step": 12802 }, { "epoch": 15.70920245398773, "grad_norm": 0.2426862269639969, "learning_rate": 5.915729493353439e-06, "loss": 0.6522630453109741, "step": 12803 }, { "epoch": 15.71042944785276, "grad_norm": 0.2540743350982666, "learning_rate": 5.912487698770902e-06, "loss": 0.6210570931434631, "step": 12804 }, { "epoch": 15.71165644171779, "grad_norm": 0.2994140684604645, "learning_rate": 5.909246673551549e-06, "loss": 0.43040865659713745, "step": 12805 }, { "epoch": 15.712883435582823, "grad_norm": 0.27264511585235596, "learning_rate": 5.906006417826013e-06, "loss": 0.6375023126602173, "step": 12806 }, { "epoch": 15.714110429447853, "grad_norm": 0.3044620156288147, "learning_rate": 5.902766931724907e-06, "loss": 0.7690158486366272, "step": 12807 }, { "epoch": 15.715337423312883, "grad_norm": 0.23894716799259186, "learning_rate": 5.899528215378791e-06, "loss": 0.7490386962890625, "step": 12808 }, { "epoch": 15.716564417177914, "grad_norm": 0.2794683575630188, "learning_rate": 5.89629026891822e-06, "loss": 0.7041683793067932, "step": 12809 }, { "epoch": 15.717791411042946, "grad_norm": 0.2752382457256317, "learning_rate": 5.893053092473699e-06, "loss": 0.6754395365715027, "step": 12810 }, { "epoch": 15.719018404907976, "grad_norm": 0.2652955651283264, "learning_rate": 5.889816686175714e-06, "loss": 0.5293214321136475, "step": 12811 }, { "epoch": 15.720245398773006, "grad_norm": 0.25399598479270935, "learning_rate": 5.886581050154722e-06, "loss": 0.7004097700119019, "step": 12812 }, { "epoch": 15.721472392638036, "grad_norm": 0.2772097885608673, "learning_rate": 5.883346184541128e-06, "loss": 0.6675175428390503, "step": 12813 }, { "epoch": 15.722699386503068, "grad_norm": 0.241989865899086, "learning_rate": 5.880112089465325e-06, "loss": 0.4095560908317566, "step": 12814 }, { "epoch": 15.723926380368098, "grad_norm": 0.26719287037849426, "learning_rate": 5.876878765057675e-06, "loss": 0.5716394782066345, "step": 12815 }, { "epoch": 15.725153374233129, "grad_norm": 0.23094972968101501, "learning_rate": 5.8736462114485e-06, "loss": 0.48704615235328674, "step": 12816 }, { "epoch": 15.726380368098159, "grad_norm": 0.2658165693283081, "learning_rate": 5.870414428768104e-06, "loss": 0.7346274852752686, "step": 12817 }, { "epoch": 15.72760736196319, "grad_norm": 0.3020244538784027, "learning_rate": 5.867183417146735e-06, "loss": 0.48938268423080444, "step": 12818 }, { "epoch": 15.728834355828221, "grad_norm": 0.2508537769317627, "learning_rate": 5.863953176714632e-06, "loss": 0.5794956684112549, "step": 12819 }, { "epoch": 15.730061349693251, "grad_norm": 0.2480389028787613, "learning_rate": 5.860723707602001e-06, "loss": 0.7469800710678101, "step": 12820 }, { "epoch": 15.731288343558282, "grad_norm": 0.2503752112388611, "learning_rate": 5.85749500993901e-06, "loss": 0.5267501473426819, "step": 12821 }, { "epoch": 15.732515337423314, "grad_norm": 0.27579984068870544, "learning_rate": 5.854267083855805e-06, "loss": 0.43521469831466675, "step": 12822 }, { "epoch": 15.733742331288344, "grad_norm": 0.25808852910995483, "learning_rate": 5.851039929482477e-06, "loss": 0.4058746099472046, "step": 12823 }, { "epoch": 15.734969325153374, "grad_norm": 0.2849666178226471, "learning_rate": 5.8478135469491215e-06, "loss": 0.524961531162262, "step": 12824 }, { "epoch": 15.736196319018404, "grad_norm": 0.3121947944164276, "learning_rate": 5.844587936385775e-06, "loss": 0.5814248919487, "step": 12825 }, { "epoch": 15.737423312883436, "grad_norm": 0.3389284610748291, "learning_rate": 5.8413630979224646e-06, "loss": 0.6224042177200317, "step": 12826 }, { "epoch": 15.738650306748466, "grad_norm": 0.2502458393573761, "learning_rate": 5.83813903168916e-06, "loss": 0.5485789775848389, "step": 12827 }, { "epoch": 15.739877300613497, "grad_norm": 0.27403226494789124, "learning_rate": 5.8349157378158186e-06, "loss": 0.5332878828048706, "step": 12828 }, { "epoch": 15.741104294478527, "grad_norm": 0.23121680319309235, "learning_rate": 5.831693216432363e-06, "loss": 0.47900354862213135, "step": 12829 }, { "epoch": 15.742331288343559, "grad_norm": 0.2854121923446655, "learning_rate": 5.8284714676686835e-06, "loss": 0.6155460476875305, "step": 12830 }, { "epoch": 15.743558282208589, "grad_norm": 0.2662443220615387, "learning_rate": 5.825250491654649e-06, "loss": 0.5385055541992188, "step": 12831 }, { "epoch": 15.74478527607362, "grad_norm": 0.29395022988319397, "learning_rate": 5.822030288520072e-06, "loss": 0.6505008935928345, "step": 12832 }, { "epoch": 15.74601226993865, "grad_norm": 0.239127054810524, "learning_rate": 5.818810858394755e-06, "loss": 0.6188477873802185, "step": 12833 }, { "epoch": 15.747239263803682, "grad_norm": 0.23954400420188904, "learning_rate": 5.815592201408468e-06, "loss": 0.48632967472076416, "step": 12834 }, { "epoch": 15.748466257668712, "grad_norm": 0.30493345856666565, "learning_rate": 5.812374317690941e-06, "loss": 0.5263742208480835, "step": 12835 }, { "epoch": 15.749693251533742, "grad_norm": 0.29950520396232605, "learning_rate": 5.80915720737189e-06, "loss": 0.5226575136184692, "step": 12836 }, { "epoch": 15.750920245398772, "grad_norm": 0.28848010301589966, "learning_rate": 5.80594087058097e-06, "loss": 0.7128545045852661, "step": 12837 }, { "epoch": 15.752147239263804, "grad_norm": 0.2711469829082489, "learning_rate": 5.802725307447831e-06, "loss": 0.5867794752120972, "step": 12838 }, { "epoch": 15.753374233128834, "grad_norm": 0.28288665413856506, "learning_rate": 5.7995105181020805e-06, "loss": 0.6737452745437622, "step": 12839 }, { "epoch": 15.754601226993865, "grad_norm": 0.2334677278995514, "learning_rate": 5.7962965026733015e-06, "loss": 0.456038236618042, "step": 12840 }, { "epoch": 15.755828220858895, "grad_norm": 0.23638366162776947, "learning_rate": 5.793083261291046e-06, "loss": 0.5436218976974487, "step": 12841 }, { "epoch": 15.757055214723927, "grad_norm": 0.2595236599445343, "learning_rate": 5.789870794084818e-06, "loss": 0.5722536444664001, "step": 12842 }, { "epoch": 15.758282208588957, "grad_norm": 0.26220640540122986, "learning_rate": 5.786659101184108e-06, "loss": 0.5733100175857544, "step": 12843 }, { "epoch": 15.759509202453987, "grad_norm": 0.29735633730888367, "learning_rate": 5.783448182718374e-06, "loss": 0.499106764793396, "step": 12844 }, { "epoch": 15.76073619631902, "grad_norm": 0.23294879496097565, "learning_rate": 5.780238038817035e-06, "loss": 0.4870888292789459, "step": 12845 }, { "epoch": 15.76196319018405, "grad_norm": 0.28779786825180054, "learning_rate": 5.777028669609483e-06, "loss": 0.6239197254180908, "step": 12846 }, { "epoch": 15.76319018404908, "grad_norm": 0.2906424105167389, "learning_rate": 5.773820075225081e-06, "loss": 0.49835044145584106, "step": 12847 }, { "epoch": 15.76441717791411, "grad_norm": 0.25563687086105347, "learning_rate": 5.770612255793156e-06, "loss": 0.6726406216621399, "step": 12848 }, { "epoch": 15.76564417177914, "grad_norm": 0.26024606823921204, "learning_rate": 5.767405211443008e-06, "loss": 0.4137412905693054, "step": 12849 }, { "epoch": 15.766871165644172, "grad_norm": 0.27167558670043945, "learning_rate": 5.764198942303908e-06, "loss": 0.44291532039642334, "step": 12850 }, { "epoch": 15.768098159509202, "grad_norm": 0.2610030472278595, "learning_rate": 5.760993448505081e-06, "loss": 0.6662598848342896, "step": 12851 }, { "epoch": 15.769325153374233, "grad_norm": 0.2864047884941101, "learning_rate": 5.757788730175736e-06, "loss": 0.7611885666847229, "step": 12852 }, { "epoch": 15.770552147239265, "grad_norm": 0.277018278837204, "learning_rate": 5.75458478744505e-06, "loss": 0.6362431049346924, "step": 12853 }, { "epoch": 15.771779141104295, "grad_norm": 0.28230684995651245, "learning_rate": 5.751381620442159e-06, "loss": 0.582423210144043, "step": 12854 }, { "epoch": 15.773006134969325, "grad_norm": 0.24903088808059692, "learning_rate": 5.748179229296175e-06, "loss": 0.5847643613815308, "step": 12855 }, { "epoch": 15.774233128834355, "grad_norm": 0.27796903252601624, "learning_rate": 5.744977614136188e-06, "loss": 0.7944222092628479, "step": 12856 }, { "epoch": 15.775460122699386, "grad_norm": 0.27657490968704224, "learning_rate": 5.741776775091226e-06, "loss": 0.553414523601532, "step": 12857 }, { "epoch": 15.776687116564418, "grad_norm": 0.2424800992012024, "learning_rate": 5.7385767122903195e-06, "loss": 0.43389666080474854, "step": 12858 }, { "epoch": 15.777914110429448, "grad_norm": 0.26545068621635437, "learning_rate": 5.735377425862448e-06, "loss": 0.49082672595977783, "step": 12859 }, { "epoch": 15.779141104294478, "grad_norm": 0.255075603723526, "learning_rate": 5.7321789159365675e-06, "loss": 0.6415769457817078, "step": 12860 }, { "epoch": 15.78036809815951, "grad_norm": 0.31885817646980286, "learning_rate": 5.72898118264161e-06, "loss": 0.5123682022094727, "step": 12861 }, { "epoch": 15.78159509202454, "grad_norm": 0.27318885922431946, "learning_rate": 5.725784226106451e-06, "loss": 0.432420551776886, "step": 12862 }, { "epoch": 15.78282208588957, "grad_norm": 0.2710093855857849, "learning_rate": 5.722588046459959e-06, "loss": 0.6710842251777649, "step": 12863 }, { "epoch": 15.7840490797546, "grad_norm": 0.26670539379119873, "learning_rate": 5.719392643830962e-06, "loss": 0.7456415891647339, "step": 12864 }, { "epoch": 15.785276073619633, "grad_norm": 0.2363160252571106, "learning_rate": 5.716198018348254e-06, "loss": 0.619030237197876, "step": 12865 }, { "epoch": 15.786503067484663, "grad_norm": 0.2742464542388916, "learning_rate": 5.713004170140615e-06, "loss": 0.6851727962493896, "step": 12866 }, { "epoch": 15.787730061349693, "grad_norm": 0.27439185976982117, "learning_rate": 5.709811099336756e-06, "loss": 0.5309034585952759, "step": 12867 }, { "epoch": 15.788957055214723, "grad_norm": 0.28479263186454773, "learning_rate": 5.706618806065401e-06, "loss": 0.43437275290489197, "step": 12868 }, { "epoch": 15.790184049079755, "grad_norm": 0.247706800699234, "learning_rate": 5.7034272904552124e-06, "loss": 0.6565269231796265, "step": 12869 }, { "epoch": 15.791411042944786, "grad_norm": 0.3035375773906708, "learning_rate": 5.700236552634844e-06, "loss": 0.7621135711669922, "step": 12870 }, { "epoch": 15.792638036809816, "grad_norm": 0.28326523303985596, "learning_rate": 5.697046592732888e-06, "loss": 0.5204185247421265, "step": 12871 }, { "epoch": 15.793865030674846, "grad_norm": 0.7353288531303406, "learning_rate": 5.693857410877931e-06, "loss": 0.5172260403633118, "step": 12872 }, { "epoch": 15.795092024539878, "grad_norm": 0.2693408131599426, "learning_rate": 5.690669007198518e-06, "loss": 0.6649735569953918, "step": 12873 }, { "epoch": 15.796319018404908, "grad_norm": 0.2986961901187897, "learning_rate": 5.687481381823165e-06, "loss": 0.4715694189071655, "step": 12874 }, { "epoch": 15.797546012269938, "grad_norm": 0.2663111388683319, "learning_rate": 5.6842945348803635e-06, "loss": 0.7936082482337952, "step": 12875 }, { "epoch": 15.798773006134969, "grad_norm": 0.27661412954330444, "learning_rate": 5.681108466498552e-06, "loss": 0.6003186106681824, "step": 12876 }, { "epoch": 15.8, "grad_norm": 0.2448684275150299, "learning_rate": 5.67792317680616e-06, "loss": 0.65837562084198, "step": 12877 }, { "epoch": 15.801226993865031, "grad_norm": 0.2774261236190796, "learning_rate": 5.674738665931575e-06, "loss": 0.42281243205070496, "step": 12878 }, { "epoch": 15.802453987730061, "grad_norm": 0.269991934299469, "learning_rate": 5.67155493400316e-06, "loss": 0.6394100189208984, "step": 12879 }, { "epoch": 15.803680981595091, "grad_norm": 0.2753709852695465, "learning_rate": 5.668371981149243e-06, "loss": 0.6665001511573792, "step": 12880 }, { "epoch": 15.804907975460123, "grad_norm": 0.2481091022491455, "learning_rate": 5.665189807498112e-06, "loss": 0.5381876230239868, "step": 12881 }, { "epoch": 15.806134969325154, "grad_norm": 0.7230775952339172, "learning_rate": 5.662008413178033e-06, "loss": 0.6818059682846069, "step": 12882 }, { "epoch": 15.807361963190184, "grad_norm": 0.3016887307167053, "learning_rate": 5.65882779831724e-06, "loss": 0.5134695172309875, "step": 12883 }, { "epoch": 15.808588957055214, "grad_norm": 0.2703820466995239, "learning_rate": 5.655647963043937e-06, "loss": 0.6808431148529053, "step": 12884 }, { "epoch": 15.809815950920246, "grad_norm": 0.26754581928253174, "learning_rate": 5.6524689074863e-06, "loss": 0.40377479791641235, "step": 12885 }, { "epoch": 15.811042944785276, "grad_norm": 0.30232736468315125, "learning_rate": 5.649290631772452e-06, "loss": 0.6766160726547241, "step": 12886 }, { "epoch": 15.812269938650306, "grad_norm": 0.31346002221107483, "learning_rate": 5.646113136030512e-06, "loss": 0.6831986308097839, "step": 12887 }, { "epoch": 15.813496932515337, "grad_norm": 0.2749636173248291, "learning_rate": 5.64293642038855e-06, "loss": 0.6920896768569946, "step": 12888 }, { "epoch": 15.814723926380369, "grad_norm": 0.26047149300575256, "learning_rate": 5.639760484974613e-06, "loss": 0.6372263431549072, "step": 12889 }, { "epoch": 15.815950920245399, "grad_norm": 0.25208237767219543, "learning_rate": 5.636585329916716e-06, "loss": 0.6025063991546631, "step": 12890 }, { "epoch": 15.81717791411043, "grad_norm": 0.2412385195493698, "learning_rate": 5.633410955342835e-06, "loss": 0.6544393301010132, "step": 12891 }, { "epoch": 15.81840490797546, "grad_norm": 0.27658993005752563, "learning_rate": 5.6302373613809245e-06, "loss": 0.6694931983947754, "step": 12892 }, { "epoch": 15.819631901840491, "grad_norm": 0.25565391778945923, "learning_rate": 5.627064548158903e-06, "loss": 0.4794592559337616, "step": 12893 }, { "epoch": 15.820858895705522, "grad_norm": 0.28930097818374634, "learning_rate": 5.62389251580466e-06, "loss": 0.6858644485473633, "step": 12894 }, { "epoch": 15.822085889570552, "grad_norm": 0.2489931732416153, "learning_rate": 5.620721264446044e-06, "loss": 0.45386725664138794, "step": 12895 }, { "epoch": 15.823312883435582, "grad_norm": 0.3127693831920624, "learning_rate": 5.617550794210879e-06, "loss": 0.6546691060066223, "step": 12896 }, { "epoch": 15.824539877300614, "grad_norm": 0.2780549228191376, "learning_rate": 5.614381105226962e-06, "loss": 0.6945840716362, "step": 12897 }, { "epoch": 15.825766871165644, "grad_norm": 0.23834598064422607, "learning_rate": 5.611212197622054e-06, "loss": 0.5965961217880249, "step": 12898 }, { "epoch": 15.826993865030675, "grad_norm": 0.25416532158851624, "learning_rate": 5.6080440715238905e-06, "loss": 0.5515996217727661, "step": 12899 }, { "epoch": 15.828220858895705, "grad_norm": 0.24012033641338348, "learning_rate": 5.604876727060157e-06, "loss": 0.4256647229194641, "step": 12900 }, { "epoch": 15.829447852760737, "grad_norm": 0.2982960641384125, "learning_rate": 5.6017101643585254e-06, "loss": 0.5428507328033447, "step": 12901 }, { "epoch": 15.830674846625767, "grad_norm": 0.24456173181533813, "learning_rate": 5.598544383546631e-06, "loss": 0.5361067652702332, "step": 12902 }, { "epoch": 15.831901840490797, "grad_norm": 0.2691926062107086, "learning_rate": 5.595379384752078e-06, "loss": 0.5855640172958374, "step": 12903 }, { "epoch": 15.833128834355827, "grad_norm": 0.31130874156951904, "learning_rate": 5.592215168102444e-06, "loss": 0.45494574308395386, "step": 12904 }, { "epoch": 15.83435582822086, "grad_norm": 0.26319852471351624, "learning_rate": 5.589051733725256e-06, "loss": 0.6769400835037231, "step": 12905 }, { "epoch": 15.83558282208589, "grad_norm": 0.2705625593662262, "learning_rate": 5.585889081748033e-06, "loss": 0.6100140810012817, "step": 12906 }, { "epoch": 15.83680981595092, "grad_norm": 0.27216488122940063, "learning_rate": 5.58272721229825e-06, "loss": 0.594793438911438, "step": 12907 }, { "epoch": 15.83803680981595, "grad_norm": 0.25041964650154114, "learning_rate": 5.579566125503355e-06, "loss": 0.7284558415412903, "step": 12908 }, { "epoch": 15.839263803680982, "grad_norm": 0.2735007703304291, "learning_rate": 5.576405821490765e-06, "loss": 0.5259683132171631, "step": 12909 }, { "epoch": 15.840490797546012, "grad_norm": 0.26938924193382263, "learning_rate": 5.573246300387852e-06, "loss": 0.7844629287719727, "step": 12910 }, { "epoch": 15.841717791411043, "grad_norm": 0.24573613703250885, "learning_rate": 5.570087562321974e-06, "loss": 0.6811654567718506, "step": 12911 }, { "epoch": 15.842944785276075, "grad_norm": 0.25161391496658325, "learning_rate": 5.5669296074204455e-06, "loss": 0.6393042206764221, "step": 12912 }, { "epoch": 15.844171779141105, "grad_norm": 0.32519423961639404, "learning_rate": 5.563772435810572e-06, "loss": 0.5718123316764832, "step": 12913 }, { "epoch": 15.845398773006135, "grad_norm": 0.3021550178527832, "learning_rate": 5.56061604761959e-06, "loss": 0.7807407379150391, "step": 12914 }, { "epoch": 15.846625766871165, "grad_norm": 0.2803446054458618, "learning_rate": 5.5574604429747325e-06, "loss": 0.6490997076034546, "step": 12915 }, { "epoch": 15.847852760736195, "grad_norm": 0.2597302496433258, "learning_rate": 5.554305622003195e-06, "loss": 0.6966940760612488, "step": 12916 }, { "epoch": 15.849079754601227, "grad_norm": 0.26515382528305054, "learning_rate": 5.5511515848321345e-06, "loss": 0.48842790722846985, "step": 12917 }, { "epoch": 15.850306748466258, "grad_norm": 0.26353809237480164, "learning_rate": 5.547998331588691e-06, "loss": 0.47529906034469604, "step": 12918 }, { "epoch": 15.851533742331288, "grad_norm": 0.30828943848609924, "learning_rate": 5.54484586239995e-06, "loss": 0.45221492648124695, "step": 12919 }, { "epoch": 15.85276073619632, "grad_norm": 0.24608832597732544, "learning_rate": 5.541694177392984e-06, "loss": 0.537915825843811, "step": 12920 }, { "epoch": 15.85398773006135, "grad_norm": 0.26252737641334534, "learning_rate": 5.538543276694827e-06, "loss": 0.641825795173645, "step": 12921 }, { "epoch": 15.85521472392638, "grad_norm": 0.2736719250679016, "learning_rate": 5.535393160432487e-06, "loss": 0.5964030027389526, "step": 12922 }, { "epoch": 15.85644171779141, "grad_norm": 0.2808792293071747, "learning_rate": 5.532243828732941e-06, "loss": 0.6969484090805054, "step": 12923 }, { "epoch": 15.857668711656443, "grad_norm": 0.2595333158969879, "learning_rate": 5.529095281723115e-06, "loss": 0.5340757369995117, "step": 12924 }, { "epoch": 15.858895705521473, "grad_norm": 0.2760104537010193, "learning_rate": 5.525947519529925e-06, "loss": 0.7753558158874512, "step": 12925 }, { "epoch": 15.860122699386503, "grad_norm": 0.2581788897514343, "learning_rate": 5.522800542280249e-06, "loss": 0.4278774559497833, "step": 12926 }, { "epoch": 15.861349693251533, "grad_norm": 0.24116060137748718, "learning_rate": 5.519654350100934e-06, "loss": 0.6529282331466675, "step": 12927 }, { "epoch": 15.862576687116565, "grad_norm": 0.2699322998523712, "learning_rate": 5.516508943118795e-06, "loss": 0.701669454574585, "step": 12928 }, { "epoch": 15.863803680981595, "grad_norm": 0.279906302690506, "learning_rate": 5.51336432146061e-06, "loss": 0.3833267092704773, "step": 12929 }, { "epoch": 15.865030674846626, "grad_norm": 0.2746529281139374, "learning_rate": 5.510220485253129e-06, "loss": 0.6440955996513367, "step": 12930 }, { "epoch": 15.866257668711656, "grad_norm": 0.28097182512283325, "learning_rate": 5.507077434623073e-06, "loss": 0.6191322207450867, "step": 12931 }, { "epoch": 15.867484662576688, "grad_norm": 0.30720922350883484, "learning_rate": 5.50393516969713e-06, "loss": 0.5572918653488159, "step": 12932 }, { "epoch": 15.868711656441718, "grad_norm": 0.2874387800693512, "learning_rate": 5.500793690601966e-06, "loss": 0.42533227801322937, "step": 12933 }, { "epoch": 15.869938650306748, "grad_norm": 0.2613588273525238, "learning_rate": 5.497652997464179e-06, "loss": 0.24297590553760529, "step": 12934 }, { "epoch": 15.871165644171779, "grad_norm": 0.21092593669891357, "learning_rate": 5.494513090410383e-06, "loss": 0.44711747765541077, "step": 12935 }, { "epoch": 15.87239263803681, "grad_norm": 0.24762189388275146, "learning_rate": 5.491373969567132e-06, "loss": 0.5153093338012695, "step": 12936 }, { "epoch": 15.87361963190184, "grad_norm": 0.2763599753379822, "learning_rate": 5.488235635060965e-06, "loss": 0.6686704158782959, "step": 12937 }, { "epoch": 15.874846625766871, "grad_norm": 0.23987600207328796, "learning_rate": 5.485098087018364e-06, "loss": 0.6766093373298645, "step": 12938 }, { "epoch": 15.876073619631901, "grad_norm": 0.26436665654182434, "learning_rate": 5.481961325565799e-06, "loss": 0.6964671611785889, "step": 12939 }, { "epoch": 15.877300613496933, "grad_norm": 0.2936016619205475, "learning_rate": 5.478825350829708e-06, "loss": 0.5663042068481445, "step": 12940 }, { "epoch": 15.878527607361963, "grad_norm": 0.3022820055484772, "learning_rate": 5.475690162936489e-06, "loss": 0.4948866367340088, "step": 12941 }, { "epoch": 15.879754601226994, "grad_norm": 0.20195774734020233, "learning_rate": 5.472555762012521e-06, "loss": 0.3703067898750305, "step": 12942 }, { "epoch": 15.880981595092024, "grad_norm": 0.24729138612747192, "learning_rate": 5.4694221481841305e-06, "loss": 0.7086818218231201, "step": 12943 }, { "epoch": 15.882208588957056, "grad_norm": 0.23441238701343536, "learning_rate": 5.466289321577631e-06, "loss": 0.42275571823120117, "step": 12944 }, { "epoch": 15.883435582822086, "grad_norm": 0.2737146019935608, "learning_rate": 5.463157282319295e-06, "loss": 0.5904544591903687, "step": 12945 }, { "epoch": 15.884662576687116, "grad_norm": 0.26763930916786194, "learning_rate": 5.46002603053537e-06, "loss": 0.5083866119384766, "step": 12946 }, { "epoch": 15.885889570552147, "grad_norm": 0.279946506023407, "learning_rate": 5.456895566352071e-06, "loss": 0.6991064548492432, "step": 12947 }, { "epoch": 15.887116564417179, "grad_norm": 0.22866520285606384, "learning_rate": 5.4537658898955675e-06, "loss": 0.432364821434021, "step": 12948 }, { "epoch": 15.888343558282209, "grad_norm": 0.2834630310535431, "learning_rate": 5.450637001292014e-06, "loss": 0.40877413749694824, "step": 12949 }, { "epoch": 15.889570552147239, "grad_norm": 0.25785529613494873, "learning_rate": 5.447508900667525e-06, "loss": 0.566697359085083, "step": 12950 }, { "epoch": 15.89079754601227, "grad_norm": 0.26123881340026855, "learning_rate": 5.44438158814819e-06, "loss": 0.6832880973815918, "step": 12951 }, { "epoch": 15.892024539877301, "grad_norm": 0.25981393456459045, "learning_rate": 5.441255063860062e-06, "loss": 0.3886697590351105, "step": 12952 }, { "epoch": 15.893251533742331, "grad_norm": 0.26138660311698914, "learning_rate": 5.4381293279291535e-06, "loss": 0.8755785822868347, "step": 12953 }, { "epoch": 15.894478527607362, "grad_norm": 0.27467644214630127, "learning_rate": 5.435004380481459e-06, "loss": 0.5946595072746277, "step": 12954 }, { "epoch": 15.895705521472392, "grad_norm": 0.257678359746933, "learning_rate": 5.4318802216429376e-06, "loss": 0.5791707038879395, "step": 12955 }, { "epoch": 15.896932515337424, "grad_norm": 0.2685476839542389, "learning_rate": 5.4287568515395124e-06, "loss": 0.6961545944213867, "step": 12956 }, { "epoch": 15.898159509202454, "grad_norm": 0.2715289294719696, "learning_rate": 5.4256342702970835e-06, "loss": 0.5847371816635132, "step": 12957 }, { "epoch": 15.899386503067484, "grad_norm": 0.2977306544780731, "learning_rate": 5.422512478041505e-06, "loss": 0.6162201166152954, "step": 12958 }, { "epoch": 15.900613496932515, "grad_norm": 0.28120383620262146, "learning_rate": 5.419391474898613e-06, "loss": 0.7699145078659058, "step": 12959 }, { "epoch": 15.901840490797547, "grad_norm": 0.25548258423805237, "learning_rate": 5.416271260994207e-06, "loss": 0.5186704397201538, "step": 12960 }, { "epoch": 15.903067484662577, "grad_norm": 0.26576468348503113, "learning_rate": 5.41315183645405e-06, "loss": 0.6112176179885864, "step": 12961 }, { "epoch": 15.904294478527607, "grad_norm": 0.30121636390686035, "learning_rate": 5.4100332014038885e-06, "loss": 0.5468166470527649, "step": 12962 }, { "epoch": 15.905521472392637, "grad_norm": 0.28979238867759705, "learning_rate": 5.406915355969408e-06, "loss": 0.53612220287323, "step": 12963 }, { "epoch": 15.90674846625767, "grad_norm": 0.26392459869384766, "learning_rate": 5.403798300276286e-06, "loss": 0.6345849633216858, "step": 12964 }, { "epoch": 15.9079754601227, "grad_norm": 0.26937127113342285, "learning_rate": 5.400682034450166e-06, "loss": 0.38669553399086, "step": 12965 }, { "epoch": 15.90920245398773, "grad_norm": 0.2515028119087219, "learning_rate": 5.397566558616657e-06, "loss": 0.5986008644104004, "step": 12966 }, { "epoch": 15.91042944785276, "grad_norm": 0.2836253345012665, "learning_rate": 5.394451872901335e-06, "loss": 0.42470961809158325, "step": 12967 }, { "epoch": 15.911656441717792, "grad_norm": 0.24483513832092285, "learning_rate": 5.391337977429737e-06, "loss": 0.4688147008419037, "step": 12968 }, { "epoch": 15.912883435582822, "grad_norm": 0.24944595992565155, "learning_rate": 5.388224872327377e-06, "loss": 0.5889392495155334, "step": 12969 }, { "epoch": 15.914110429447852, "grad_norm": 0.2956094443798065, "learning_rate": 5.38511255771974e-06, "loss": 0.5737911462783813, "step": 12970 }, { "epoch": 15.915337423312884, "grad_norm": 0.33050185441970825, "learning_rate": 5.38200103373227e-06, "loss": 0.49336451292037964, "step": 12971 }, { "epoch": 15.916564417177915, "grad_norm": 0.23612597584724426, "learning_rate": 5.378890300490394e-06, "loss": 0.578619658946991, "step": 12972 }, { "epoch": 15.917791411042945, "grad_norm": 0.3348267078399658, "learning_rate": 5.375780358119484e-06, "loss": 0.6331852078437805, "step": 12973 }, { "epoch": 15.919018404907975, "grad_norm": 0.2763720452785492, "learning_rate": 5.3726712067448945e-06, "loss": 0.6768677234649658, "step": 12974 }, { "epoch": 15.920245398773005, "grad_norm": 0.24911446869373322, "learning_rate": 5.369562846491952e-06, "loss": 0.5928248763084412, "step": 12975 }, { "epoch": 15.921472392638037, "grad_norm": 0.24680325388908386, "learning_rate": 5.366455277485943e-06, "loss": 0.4642452597618103, "step": 12976 }, { "epoch": 15.922699386503067, "grad_norm": 0.27855634689331055, "learning_rate": 5.363348499852134e-06, "loss": 0.7171740531921387, "step": 12977 }, { "epoch": 15.923926380368098, "grad_norm": 0.2913602888584137, "learning_rate": 5.36024251371573e-06, "loss": 0.6166113615036011, "step": 12978 }, { "epoch": 15.92515337423313, "grad_norm": 0.2689216732978821, "learning_rate": 5.357137319201932e-06, "loss": 0.612385094165802, "step": 12979 }, { "epoch": 15.92638036809816, "grad_norm": 0.2735251784324646, "learning_rate": 5.354032916435911e-06, "loss": 0.4619371294975281, "step": 12980 }, { "epoch": 15.92760736196319, "grad_norm": 0.25716549158096313, "learning_rate": 5.350929305542801e-06, "loss": 0.592461109161377, "step": 12981 }, { "epoch": 15.92883435582822, "grad_norm": 0.23350276052951813, "learning_rate": 5.347826486647681e-06, "loss": 0.5712140202522278, "step": 12982 }, { "epoch": 15.93006134969325, "grad_norm": 0.25282958149909973, "learning_rate": 5.344724459875624e-06, "loss": 0.6747270226478577, "step": 12983 }, { "epoch": 15.931288343558283, "grad_norm": 0.27017316222190857, "learning_rate": 5.341623225351667e-06, "loss": 0.6322984099388123, "step": 12984 }, { "epoch": 15.932515337423313, "grad_norm": 0.24645322561264038, "learning_rate": 5.338522783200811e-06, "loss": 0.44512587785720825, "step": 12985 }, { "epoch": 15.933742331288343, "grad_norm": 0.2774588465690613, "learning_rate": 5.335423133548028e-06, "loss": 0.6853476762771606, "step": 12986 }, { "epoch": 15.934969325153375, "grad_norm": 0.30987048149108887, "learning_rate": 5.332324276518252e-06, "loss": 0.7592455744743347, "step": 12987 }, { "epoch": 15.936196319018405, "grad_norm": 0.2765893340110779, "learning_rate": 5.329226212236388e-06, "loss": 0.5396184921264648, "step": 12988 }, { "epoch": 15.937423312883435, "grad_norm": 0.23476813733577728, "learning_rate": 5.326128940827313e-06, "loss": 0.6114888191223145, "step": 12989 }, { "epoch": 15.938650306748466, "grad_norm": 0.2637365162372589, "learning_rate": 5.323032462415869e-06, "loss": 0.5568394064903259, "step": 12990 }, { "epoch": 15.939877300613498, "grad_norm": 0.23456163704395294, "learning_rate": 5.3199367771268715e-06, "loss": 0.503196120262146, "step": 12991 }, { "epoch": 15.941104294478528, "grad_norm": 0.3166610300540924, "learning_rate": 5.316841885085086e-06, "loss": 0.3808678388595581, "step": 12992 }, { "epoch": 15.942331288343558, "grad_norm": 0.25774604082107544, "learning_rate": 5.313747786415269e-06, "loss": 0.45783358812332153, "step": 12993 }, { "epoch": 15.943558282208588, "grad_norm": 0.24264578521251678, "learning_rate": 5.3106544812421305e-06, "loss": 0.5716643333435059, "step": 12994 }, { "epoch": 15.94478527607362, "grad_norm": 0.2778535783290863, "learning_rate": 5.307561969690352e-06, "loss": 0.6967500448226929, "step": 12995 }, { "epoch": 15.94601226993865, "grad_norm": 0.25895318388938904, "learning_rate": 5.304470251884594e-06, "loss": 0.6075814962387085, "step": 12996 }, { "epoch": 15.94723926380368, "grad_norm": 0.23280566930770874, "learning_rate": 5.30137932794946e-06, "loss": 0.1948186457157135, "step": 12997 }, { "epoch": 15.948466257668711, "grad_norm": 0.26404932141304016, "learning_rate": 5.298289198009546e-06, "loss": 0.6227001547813416, "step": 12998 }, { "epoch": 15.949693251533743, "grad_norm": 0.3145429491996765, "learning_rate": 5.2951998621893985e-06, "loss": 0.6014790534973145, "step": 12999 }, { "epoch": 15.950920245398773, "grad_norm": 0.25054362416267395, "learning_rate": 5.292111320613547e-06, "loss": 0.3537519872188568, "step": 13000 }, { "epoch": 15.952147239263804, "grad_norm": 0.2697552442550659, "learning_rate": 5.289023573406479e-06, "loss": 0.6548481583595276, "step": 13001 }, { "epoch": 15.953374233128834, "grad_norm": 0.2218792736530304, "learning_rate": 5.285936620692655e-06, "loss": 0.5027037858963013, "step": 13002 }, { "epoch": 15.954601226993866, "grad_norm": 0.2839818298816681, "learning_rate": 5.2828504625964975e-06, "loss": 0.6143465042114258, "step": 13003 }, { "epoch": 15.955828220858896, "grad_norm": 0.22097378969192505, "learning_rate": 5.2797650992424034e-06, "loss": 0.47520530223846436, "step": 13004 }, { "epoch": 15.957055214723926, "grad_norm": 0.2897052764892578, "learning_rate": 5.27668053075474e-06, "loss": 0.6560927629470825, "step": 13005 }, { "epoch": 15.958282208588956, "grad_norm": 0.2788500487804413, "learning_rate": 5.273596757257829e-06, "loss": 0.6123592257499695, "step": 13006 }, { "epoch": 15.959509202453988, "grad_norm": 0.27733463048934937, "learning_rate": 5.270513778875968e-06, "loss": 0.766680896282196, "step": 13007 }, { "epoch": 15.960736196319019, "grad_norm": 0.28626781702041626, "learning_rate": 5.267431595733427e-06, "loss": 0.7183566093444824, "step": 13008 }, { "epoch": 15.961963190184049, "grad_norm": 0.2387915402650833, "learning_rate": 5.264350207954438e-06, "loss": 0.5352331399917603, "step": 13009 }, { "epoch": 15.963190184049079, "grad_norm": 0.26076385378837585, "learning_rate": 5.261269615663211e-06, "loss": 0.5163255333900452, "step": 13010 }, { "epoch": 15.964417177914111, "grad_norm": 0.2284172922372818, "learning_rate": 5.258189818983905e-06, "loss": 0.4682171642780304, "step": 13011 }, { "epoch": 15.965644171779141, "grad_norm": 0.25805285573005676, "learning_rate": 5.255110818040659e-06, "loss": 0.5922249555587769, "step": 13012 }, { "epoch": 15.966871165644172, "grad_norm": 0.2665606737136841, "learning_rate": 5.2520326129575835e-06, "loss": 0.3919435143470764, "step": 13013 }, { "epoch": 15.968098159509202, "grad_norm": 0.28061148524284363, "learning_rate": 5.248955203858752e-06, "loss": 0.6460117101669312, "step": 13014 }, { "epoch": 15.969325153374234, "grad_norm": 0.2666756510734558, "learning_rate": 5.2458785908682084e-06, "loss": 0.7933895587921143, "step": 13015 }, { "epoch": 15.970552147239264, "grad_norm": 0.27826055884361267, "learning_rate": 5.2428027741099535e-06, "loss": 0.4665657877922058, "step": 13016 }, { "epoch": 15.971779141104294, "grad_norm": 0.2565501630306244, "learning_rate": 5.239727753707968e-06, "loss": 0.44159913063049316, "step": 13017 }, { "epoch": 15.973006134969324, "grad_norm": 0.24144409596920013, "learning_rate": 5.236653529786198e-06, "loss": 0.44537103176116943, "step": 13018 }, { "epoch": 15.974233128834356, "grad_norm": 0.29039186239242554, "learning_rate": 5.233580102468558e-06, "loss": 0.6212446689605713, "step": 13019 }, { "epoch": 15.975460122699387, "grad_norm": 0.2830570340156555, "learning_rate": 5.230507471878937e-06, "loss": 0.5306929349899292, "step": 13020 }, { "epoch": 15.976687116564417, "grad_norm": 0.20203877985477448, "learning_rate": 5.227435638141167e-06, "loss": 0.22172686457633972, "step": 13021 }, { "epoch": 15.977914110429447, "grad_norm": 0.2759567201137543, "learning_rate": 5.224364601379075e-06, "loss": 0.37772929668426514, "step": 13022 }, { "epoch": 15.979141104294479, "grad_norm": 0.2319529950618744, "learning_rate": 5.221294361716436e-06, "loss": 0.5597262382507324, "step": 13023 }, { "epoch": 15.98036809815951, "grad_norm": 0.31216850876808167, "learning_rate": 5.2182249192770235e-06, "loss": 0.5580074787139893, "step": 13024 }, { "epoch": 15.98159509202454, "grad_norm": 0.26398494839668274, "learning_rate": 5.215156274184538e-06, "loss": 0.6529409885406494, "step": 13025 }, { "epoch": 15.98282208588957, "grad_norm": 0.3300932049751282, "learning_rate": 5.212088426562678e-06, "loss": 0.5013788342475891, "step": 13026 }, { "epoch": 15.984049079754602, "grad_norm": 0.2492634654045105, "learning_rate": 5.2090213765350945e-06, "loss": 0.6816970109939575, "step": 13027 }, { "epoch": 15.985276073619632, "grad_norm": 0.28752097487449646, "learning_rate": 5.205955124225414e-06, "loss": 0.5649071931838989, "step": 13028 }, { "epoch": 15.986503067484662, "grad_norm": 0.2648501992225647, "learning_rate": 5.2028896697572325e-06, "loss": 0.5098661780357361, "step": 13029 }, { "epoch": 15.987730061349692, "grad_norm": 0.2570262849330902, "learning_rate": 5.199825013254103e-06, "loss": 0.6296374797821045, "step": 13030 }, { "epoch": 15.988957055214724, "grad_norm": 0.2843276560306549, "learning_rate": 5.196761154839552e-06, "loss": 0.6650700569152832, "step": 13031 }, { "epoch": 15.990184049079755, "grad_norm": 0.25511783361434937, "learning_rate": 5.193698094637078e-06, "loss": 0.652729868888855, "step": 13032 }, { "epoch": 15.991411042944785, "grad_norm": 0.27185848355293274, "learning_rate": 5.190635832770146e-06, "loss": 0.4431467056274414, "step": 13033 }, { "epoch": 15.992638036809815, "grad_norm": 0.25814419984817505, "learning_rate": 5.1875743693621944e-06, "loss": 0.5425264835357666, "step": 13034 }, { "epoch": 15.993865030674847, "grad_norm": 0.25825998187065125, "learning_rate": 5.184513704536603e-06, "loss": 0.5307396054267883, "step": 13035 }, { "epoch": 15.995092024539877, "grad_norm": 0.251099556684494, "learning_rate": 5.18145383841675e-06, "loss": 0.5010699033737183, "step": 13036 }, { "epoch": 15.996319018404908, "grad_norm": 0.24851934611797333, "learning_rate": 5.178394771125969e-06, "loss": 0.560047447681427, "step": 13037 }, { "epoch": 15.99754601226994, "grad_norm": 0.3421897888183594, "learning_rate": 5.175336502787559e-06, "loss": 0.8262635469436646, "step": 13038 }, { "epoch": 15.99877300613497, "grad_norm": 0.2576693296432495, "learning_rate": 5.172279033524801e-06, "loss": 0.6789044141769409, "step": 13039 }, { "epoch": 16.0, "grad_norm": 0.258897989988327, "learning_rate": 5.169222363460921e-06, "loss": 0.8071471452713013, "step": 13040 }, { "epoch": 16.001226993865032, "grad_norm": 0.2987477779388428, "learning_rate": 5.166166492719124e-06, "loss": 0.5348886251449585, "step": 13041 }, { "epoch": 16.00245398773006, "grad_norm": 0.26694822311401367, "learning_rate": 5.163111421422589e-06, "loss": 0.7468127608299255, "step": 13042 }, { "epoch": 16.003680981595092, "grad_norm": 0.33100375533103943, "learning_rate": 5.160057149694458e-06, "loss": 0.7634439468383789, "step": 13043 }, { "epoch": 16.004907975460124, "grad_norm": 0.20626524090766907, "learning_rate": 5.157003677657843e-06, "loss": 0.42979174852371216, "step": 13044 }, { "epoch": 16.006134969325153, "grad_norm": 0.2782593369483948, "learning_rate": 5.153951005435803e-06, "loss": 0.7409109473228455, "step": 13045 }, { "epoch": 16.007361963190185, "grad_norm": 0.26117467880249023, "learning_rate": 5.1508991331514014e-06, "loss": 0.7233016490936279, "step": 13046 }, { "epoch": 16.008588957055213, "grad_norm": 0.2547008991241455, "learning_rate": 5.147848060927645e-06, "loss": 0.698452889919281, "step": 13047 }, { "epoch": 16.009815950920245, "grad_norm": 0.3035458028316498, "learning_rate": 5.1447977888875206e-06, "loss": 0.439968466758728, "step": 13048 }, { "epoch": 16.011042944785277, "grad_norm": 0.27553021907806396, "learning_rate": 5.14174831715396e-06, "loss": 0.5260226130485535, "step": 13049 }, { "epoch": 16.012269938650306, "grad_norm": 0.26874637603759766, "learning_rate": 5.13869964584989e-06, "loss": 0.8002504110336304, "step": 13050 }, { "epoch": 16.013496932515338, "grad_norm": 0.2452014982700348, "learning_rate": 5.135651775098188e-06, "loss": 0.5749192237854004, "step": 13051 }, { "epoch": 16.01472392638037, "grad_norm": 0.2212306410074234, "learning_rate": 5.13260470502171e-06, "loss": 0.5469875931739807, "step": 13052 }, { "epoch": 16.0159509202454, "grad_norm": 0.29459458589553833, "learning_rate": 5.129558435743278e-06, "loss": 0.3603487014770508, "step": 13053 }, { "epoch": 16.01717791411043, "grad_norm": 0.26847225427627563, "learning_rate": 5.126512967385666e-06, "loss": 0.7329192161560059, "step": 13054 }, { "epoch": 16.01840490797546, "grad_norm": 0.27476829290390015, "learning_rate": 5.123468300071638e-06, "loss": 0.650699257850647, "step": 13055 }, { "epoch": 16.01963190184049, "grad_norm": 0.25696492195129395, "learning_rate": 5.120424433923912e-06, "loss": 0.43332237005233765, "step": 13056 }, { "epoch": 16.020858895705523, "grad_norm": 0.26430490612983704, "learning_rate": 5.1173813690651775e-06, "loss": 0.706270694732666, "step": 13057 }, { "epoch": 16.02208588957055, "grad_norm": 0.253445029258728, "learning_rate": 5.114339105618094e-06, "loss": 0.633683443069458, "step": 13058 }, { "epoch": 16.023312883435583, "grad_norm": 0.2655712366104126, "learning_rate": 5.111297643705293e-06, "loss": 0.6306790113449097, "step": 13059 }, { "epoch": 16.024539877300615, "grad_norm": 0.2499399334192276, "learning_rate": 5.1082569834493495e-06, "loss": 0.70927494764328, "step": 13060 }, { "epoch": 16.025766871165644, "grad_norm": 0.2548978328704834, "learning_rate": 5.105217124972839e-06, "loss": 0.7740485668182373, "step": 13061 }, { "epoch": 16.026993865030676, "grad_norm": 0.25912997126579285, "learning_rate": 5.1021780683982805e-06, "loss": 0.5249937772750854, "step": 13062 }, { "epoch": 16.028220858895704, "grad_norm": 0.23662908375263214, "learning_rate": 5.099139813848175e-06, "loss": 0.430331826210022, "step": 13063 }, { "epoch": 16.029447852760736, "grad_norm": 0.2601916193962097, "learning_rate": 5.096102361444994e-06, "loss": 0.7080471515655518, "step": 13064 }, { "epoch": 16.030674846625768, "grad_norm": 0.23901738226413727, "learning_rate": 5.0930657113111485e-06, "loss": 0.2508664131164551, "step": 13065 }, { "epoch": 16.031901840490796, "grad_norm": 0.24320825934410095, "learning_rate": 5.090029863569049e-06, "loss": 0.5415293574333191, "step": 13066 }, { "epoch": 16.03312883435583, "grad_norm": 0.2811431884765625, "learning_rate": 5.086994818341054e-06, "loss": 0.6707863807678223, "step": 13067 }, { "epoch": 16.03435582822086, "grad_norm": 0.24764291942119598, "learning_rate": 5.083960575749516e-06, "loss": 0.6728987693786621, "step": 13068 }, { "epoch": 16.03558282208589, "grad_norm": 0.32403939962387085, "learning_rate": 5.0809271359167215e-06, "loss": 0.5539826154708862, "step": 13069 }, { "epoch": 16.03680981595092, "grad_norm": 0.2726162374019623, "learning_rate": 5.077894498964941e-06, "loss": 0.7567647695541382, "step": 13070 }, { "epoch": 16.03803680981595, "grad_norm": 0.2610081136226654, "learning_rate": 5.074862665016414e-06, "loss": 0.5253472328186035, "step": 13071 }, { "epoch": 16.03926380368098, "grad_norm": 0.26352572441101074, "learning_rate": 5.071831634193347e-06, "loss": 0.607001006603241, "step": 13072 }, { "epoch": 16.040490797546013, "grad_norm": 0.26861435174942017, "learning_rate": 5.068801406617912e-06, "loss": 0.6935243606567383, "step": 13073 }, { "epoch": 16.041717791411042, "grad_norm": 0.2669695019721985, "learning_rate": 5.0657719824122425e-06, "loss": 0.5497543215751648, "step": 13074 }, { "epoch": 16.042944785276074, "grad_norm": 0.23763298988342285, "learning_rate": 5.062743361698452e-06, "loss": 0.5563298463821411, "step": 13075 }, { "epoch": 16.044171779141106, "grad_norm": 0.2647224962711334, "learning_rate": 5.059715544598611e-06, "loss": 0.6128878593444824, "step": 13076 }, { "epoch": 16.045398773006134, "grad_norm": 0.2711622714996338, "learning_rate": 5.0566885312347656e-06, "loss": 0.6690924167633057, "step": 13077 }, { "epoch": 16.046625766871166, "grad_norm": 0.28719109296798706, "learning_rate": 5.053662321728933e-06, "loss": 0.6004533171653748, "step": 13078 }, { "epoch": 16.047852760736195, "grad_norm": 0.23916752636432648, "learning_rate": 5.050636916203078e-06, "loss": 0.6304936408996582, "step": 13079 }, { "epoch": 16.049079754601227, "grad_norm": 0.26611536741256714, "learning_rate": 5.0476123147791485e-06, "loss": 0.5842863917350769, "step": 13080 }, { "epoch": 16.05030674846626, "grad_norm": 0.2613530457019806, "learning_rate": 5.044588517579063e-06, "loss": 0.5343704223632812, "step": 13081 }, { "epoch": 16.051533742331287, "grad_norm": 0.2872179448604584, "learning_rate": 5.041565524724698e-06, "loss": 0.6482954025268555, "step": 13082 }, { "epoch": 16.05276073619632, "grad_norm": 0.20191827416419983, "learning_rate": 5.038543336337912e-06, "loss": 0.3866099417209625, "step": 13083 }, { "epoch": 16.05398773006135, "grad_norm": 0.2698875665664673, "learning_rate": 5.035521952540503e-06, "loss": 0.6135208010673523, "step": 13084 }, { "epoch": 16.05521472392638, "grad_norm": 0.2587755620479584, "learning_rate": 5.032501373454265e-06, "loss": 0.5665088295936584, "step": 13085 }, { "epoch": 16.05644171779141, "grad_norm": 0.2795843183994293, "learning_rate": 5.029481599200947e-06, "loss": 0.7162365913391113, "step": 13086 }, { "epoch": 16.05766871165644, "grad_norm": 0.29291632771492004, "learning_rate": 5.026462629902268e-06, "loss": 0.4225742816925049, "step": 13087 }, { "epoch": 16.058895705521472, "grad_norm": 0.2533573806285858, "learning_rate": 5.023444465679922e-06, "loss": 0.6579992771148682, "step": 13088 }, { "epoch": 16.060122699386504, "grad_norm": 0.2851404547691345, "learning_rate": 5.020427106655548e-06, "loss": 0.7334887981414795, "step": 13089 }, { "epoch": 16.061349693251532, "grad_norm": 0.23880161345005035, "learning_rate": 5.017410552950766e-06, "loss": 0.436615526676178, "step": 13090 }, { "epoch": 16.062576687116565, "grad_norm": 0.26889851689338684, "learning_rate": 5.014394804687178e-06, "loss": 0.6366506218910217, "step": 13091 }, { "epoch": 16.063803680981597, "grad_norm": 0.29842597246170044, "learning_rate": 5.011379861986346e-06, "loss": 0.6024984121322632, "step": 13092 }, { "epoch": 16.065030674846625, "grad_norm": 0.2426719069480896, "learning_rate": 5.008365724969771e-06, "loss": 0.5836608409881592, "step": 13093 }, { "epoch": 16.066257668711657, "grad_norm": 0.2708030343055725, "learning_rate": 5.0053523937589584e-06, "loss": 0.7521858811378479, "step": 13094 }, { "epoch": 16.067484662576685, "grad_norm": 0.2415982484817505, "learning_rate": 5.002339868475361e-06, "loss": 0.5202589631080627, "step": 13095 }, { "epoch": 16.068711656441717, "grad_norm": 0.25995808839797974, "learning_rate": 4.999328149240412e-06, "loss": 0.8550440073013306, "step": 13096 }, { "epoch": 16.06993865030675, "grad_norm": 0.2817157208919525, "learning_rate": 4.996317236175507e-06, "loss": 0.5464953184127808, "step": 13097 }, { "epoch": 16.071165644171778, "grad_norm": 0.22676052153110504, "learning_rate": 4.993307129401994e-06, "loss": 0.48188865184783936, "step": 13098 }, { "epoch": 16.07239263803681, "grad_norm": 0.30693182349205017, "learning_rate": 4.990297829041213e-06, "loss": 0.5684122443199158, "step": 13099 }, { "epoch": 16.073619631901842, "grad_norm": 0.2378954142332077, "learning_rate": 4.987289335214454e-06, "loss": 0.3941003680229187, "step": 13100 }, { "epoch": 16.07484662576687, "grad_norm": 0.2759247422218323, "learning_rate": 4.984281648042985e-06, "loss": 0.6150417327880859, "step": 13101 }, { "epoch": 16.076073619631902, "grad_norm": 0.23794446885585785, "learning_rate": 4.981274767648042e-06, "loss": 0.6195883750915527, "step": 13102 }, { "epoch": 16.07730061349693, "grad_norm": 0.2443215250968933, "learning_rate": 4.978268694150811e-06, "loss": 0.4561368525028229, "step": 13103 }, { "epoch": 16.078527607361963, "grad_norm": 0.2651444375514984, "learning_rate": 4.975263427672469e-06, "loss": 0.569870114326477, "step": 13104 }, { "epoch": 16.079754601226995, "grad_norm": 0.2660331428050995, "learning_rate": 4.972258968334143e-06, "loss": 0.6021736860275269, "step": 13105 }, { "epoch": 16.080981595092023, "grad_norm": 0.28052952885627747, "learning_rate": 4.969255316256935e-06, "loss": 0.5541611909866333, "step": 13106 }, { "epoch": 16.082208588957055, "grad_norm": 0.24737954139709473, "learning_rate": 4.9662524715619245e-06, "loss": 0.5197997093200684, "step": 13107 }, { "epoch": 16.083435582822087, "grad_norm": 0.3370431959629059, "learning_rate": 4.963250434370134e-06, "loss": 0.7132781744003296, "step": 13108 }, { "epoch": 16.084662576687116, "grad_norm": 0.2928484082221985, "learning_rate": 4.960249204802569e-06, "loss": 0.5913158059120178, "step": 13109 }, { "epoch": 16.085889570552148, "grad_norm": 0.26269105076789856, "learning_rate": 4.957248782980203e-06, "loss": 0.583319902420044, "step": 13110 }, { "epoch": 16.08711656441718, "grad_norm": 0.2651742696762085, "learning_rate": 4.954249169023978e-06, "loss": 0.6551303267478943, "step": 13111 }, { "epoch": 16.088343558282208, "grad_norm": 0.29566675424575806, "learning_rate": 4.951250363054796e-06, "loss": 0.4853900969028473, "step": 13112 }, { "epoch": 16.08957055214724, "grad_norm": 0.2705538868904114, "learning_rate": 4.948252365193529e-06, "loss": 0.45553186535835266, "step": 13113 }, { "epoch": 16.09079754601227, "grad_norm": 0.25865715742111206, "learning_rate": 4.94525517556102e-06, "loss": 0.7069442272186279, "step": 13114 }, { "epoch": 16.0920245398773, "grad_norm": 0.2403561770915985, "learning_rate": 4.942258794278079e-06, "loss": 0.5768589973449707, "step": 13115 }, { "epoch": 16.093251533742333, "grad_norm": 0.2971912920475006, "learning_rate": 4.939263221465484e-06, "loss": 0.7352773547172546, "step": 13116 }, { "epoch": 16.09447852760736, "grad_norm": 0.2411186248064041, "learning_rate": 4.93626845724397e-06, "loss": 0.5274683237075806, "step": 13117 }, { "epoch": 16.095705521472393, "grad_norm": 0.2502848505973816, "learning_rate": 4.9332745017342475e-06, "loss": 0.6560822129249573, "step": 13118 }, { "epoch": 16.096932515337425, "grad_norm": 0.409450888633728, "learning_rate": 4.930281355056998e-06, "loss": 0.45870697498321533, "step": 13119 }, { "epoch": 16.098159509202453, "grad_norm": 0.2700274884700775, "learning_rate": 4.9272890173328675e-06, "loss": 0.522439181804657, "step": 13120 }, { "epoch": 16.099386503067485, "grad_norm": 0.288932204246521, "learning_rate": 4.9242974886824746e-06, "loss": 0.6392305493354797, "step": 13121 }, { "epoch": 16.100613496932514, "grad_norm": 0.26112663745880127, "learning_rate": 4.921306769226386e-06, "loss": 0.788783073425293, "step": 13122 }, { "epoch": 16.101840490797546, "grad_norm": 0.2800474464893341, "learning_rate": 4.918316859085153e-06, "loss": 0.8035540580749512, "step": 13123 }, { "epoch": 16.103067484662578, "grad_norm": 0.29269641637802124, "learning_rate": 4.915327758379296e-06, "loss": 0.7010263204574585, "step": 13124 }, { "epoch": 16.104294478527606, "grad_norm": 0.3049318492412567, "learning_rate": 4.912339467229291e-06, "loss": 0.6850440502166748, "step": 13125 }, { "epoch": 16.10552147239264, "grad_norm": 0.27186301350593567, "learning_rate": 4.909351985755597e-06, "loss": 0.743990421295166, "step": 13126 }, { "epoch": 16.10674846625767, "grad_norm": 0.24862919747829437, "learning_rate": 4.90636531407862e-06, "loss": 0.5589421987533569, "step": 13127 }, { "epoch": 16.1079754601227, "grad_norm": 0.24466146528720856, "learning_rate": 4.903379452318746e-06, "loss": 0.6041274666786194, "step": 13128 }, { "epoch": 16.10920245398773, "grad_norm": 0.2645622193813324, "learning_rate": 4.900394400596331e-06, "loss": 0.7062739133834839, "step": 13129 }, { "epoch": 16.11042944785276, "grad_norm": 0.24086056649684906, "learning_rate": 4.89741015903169e-06, "loss": 0.46289536356925964, "step": 13130 }, { "epoch": 16.11165644171779, "grad_norm": 0.2600914239883423, "learning_rate": 4.894426727745116e-06, "loss": 0.635460615158081, "step": 13131 }, { "epoch": 16.112883435582823, "grad_norm": 0.26358675956726074, "learning_rate": 4.891444106856854e-06, "loss": 0.5710529685020447, "step": 13132 }, { "epoch": 16.11411042944785, "grad_norm": 0.253864049911499, "learning_rate": 4.888462296487128e-06, "loss": 0.4841051399707794, "step": 13133 }, { "epoch": 16.115337423312884, "grad_norm": 0.2700788080692291, "learning_rate": 4.88548129675612e-06, "loss": 0.6963754892349243, "step": 13134 }, { "epoch": 16.116564417177916, "grad_norm": 0.2735915780067444, "learning_rate": 4.882501107784007e-06, "loss": 0.7655484676361084, "step": 13135 }, { "epoch": 16.117791411042944, "grad_norm": 0.2741968035697937, "learning_rate": 4.879521729690889e-06, "loss": 0.5286678671836853, "step": 13136 }, { "epoch": 16.119018404907976, "grad_norm": 0.27167972922325134, "learning_rate": 4.876543162596866e-06, "loss": 0.591541051864624, "step": 13137 }, { "epoch": 16.120245398773005, "grad_norm": 0.29270538687705994, "learning_rate": 4.8735654066219935e-06, "loss": 0.6303973197937012, "step": 13138 }, { "epoch": 16.121472392638037, "grad_norm": 0.2893234193325043, "learning_rate": 4.870588461886297e-06, "loss": 0.5766686201095581, "step": 13139 }, { "epoch": 16.12269938650307, "grad_norm": 0.2398088127374649, "learning_rate": 4.867612328509777e-06, "loss": 0.5517804622650146, "step": 13140 }, { "epoch": 16.123926380368097, "grad_norm": 0.3382578194141388, "learning_rate": 4.864637006612377e-06, "loss": 0.4417123794555664, "step": 13141 }, { "epoch": 16.12515337423313, "grad_norm": 0.29083165526390076, "learning_rate": 4.861662496314029e-06, "loss": 0.5894485116004944, "step": 13142 }, { "epoch": 16.12638036809816, "grad_norm": 0.28222060203552246, "learning_rate": 4.858688797734634e-06, "loss": 0.7196962237358093, "step": 13143 }, { "epoch": 16.12760736196319, "grad_norm": 0.23963573575019836, "learning_rate": 4.855715910994043e-06, "loss": 0.3818725347518921, "step": 13144 }, { "epoch": 16.12883435582822, "grad_norm": 0.31219708919525146, "learning_rate": 4.852743836212101e-06, "loss": 0.8090271949768066, "step": 13145 }, { "epoch": 16.13006134969325, "grad_norm": 0.28365078568458557, "learning_rate": 4.8497725735085874e-06, "loss": 0.5790428519248962, "step": 13146 }, { "epoch": 16.131288343558282, "grad_norm": 0.2942980229854584, "learning_rate": 4.846802123003269e-06, "loss": 0.6157505512237549, "step": 13147 }, { "epoch": 16.132515337423314, "grad_norm": 0.24072276055812836, "learning_rate": 4.84383248481588e-06, "loss": 0.710975706577301, "step": 13148 }, { "epoch": 16.133742331288342, "grad_norm": 0.2670470178127289, "learning_rate": 4.8408636590661136e-06, "loss": 0.4430112838745117, "step": 13149 }, { "epoch": 16.134969325153374, "grad_norm": 0.26567304134368896, "learning_rate": 4.837895645873647e-06, "loss": 0.6737350225448608, "step": 13150 }, { "epoch": 16.136196319018406, "grad_norm": 0.24781638383865356, "learning_rate": 4.834928445358095e-06, "loss": 0.5618760585784912, "step": 13151 }, { "epoch": 16.137423312883435, "grad_norm": 0.2597027122974396, "learning_rate": 4.831962057639064e-06, "loss": 0.32084476947784424, "step": 13152 }, { "epoch": 16.138650306748467, "grad_norm": 0.3139641284942627, "learning_rate": 4.82899648283612e-06, "loss": 0.5638801455497742, "step": 13153 }, { "epoch": 16.139877300613495, "grad_norm": 0.2607991099357605, "learning_rate": 4.826031721068802e-06, "loss": 0.4736795425415039, "step": 13154 }, { "epoch": 16.141104294478527, "grad_norm": 0.2910735607147217, "learning_rate": 4.8230677724566135e-06, "loss": 0.5457116961479187, "step": 13155 }, { "epoch": 16.14233128834356, "grad_norm": 0.27580368518829346, "learning_rate": 4.820104637118999e-06, "loss": 0.550837516784668, "step": 13156 }, { "epoch": 16.143558282208588, "grad_norm": 0.2515336275100708, "learning_rate": 4.817142315175424e-06, "loss": 0.6659282445907593, "step": 13157 }, { "epoch": 16.14478527607362, "grad_norm": 0.27303221821784973, "learning_rate": 4.814180806745275e-06, "loss": 0.6290711760520935, "step": 13158 }, { "epoch": 16.14601226993865, "grad_norm": 0.22520695626735687, "learning_rate": 4.811220111947934e-06, "loss": 0.5161122679710388, "step": 13159 }, { "epoch": 16.14723926380368, "grad_norm": 0.27080830931663513, "learning_rate": 4.8082602309027255e-06, "loss": 0.6321225166320801, "step": 13160 }, { "epoch": 16.148466257668712, "grad_norm": 0.3010316491127014, "learning_rate": 4.805301163728956e-06, "loss": 0.5394742488861084, "step": 13161 }, { "epoch": 16.14969325153374, "grad_norm": 0.2672591805458069, "learning_rate": 4.802342910545898e-06, "loss": 0.6142367124557495, "step": 13162 }, { "epoch": 16.150920245398773, "grad_norm": 0.24773141741752625, "learning_rate": 4.799385471472795e-06, "loss": 0.3138881325721741, "step": 13163 }, { "epoch": 16.152147239263805, "grad_norm": 0.276109904050827, "learning_rate": 4.796428846628848e-06, "loss": 0.7964004278182983, "step": 13164 }, { "epoch": 16.153374233128833, "grad_norm": 0.26131147146224976, "learning_rate": 4.793473036133239e-06, "loss": 0.5893199443817139, "step": 13165 }, { "epoch": 16.154601226993865, "grad_norm": 0.256214439868927, "learning_rate": 4.790518040105093e-06, "loss": 0.7451523542404175, "step": 13166 }, { "epoch": 16.155828220858897, "grad_norm": 0.25838518142700195, "learning_rate": 4.78756385866353e-06, "loss": 0.7196169495582581, "step": 13167 }, { "epoch": 16.157055214723925, "grad_norm": 0.2131279855966568, "learning_rate": 4.784610491927615e-06, "loss": 0.43673965334892273, "step": 13168 }, { "epoch": 16.158282208588957, "grad_norm": 0.28412795066833496, "learning_rate": 4.781657940016398e-06, "loss": 0.6144632697105408, "step": 13169 }, { "epoch": 16.15950920245399, "grad_norm": 0.25784993171691895, "learning_rate": 4.778706203048891e-06, "loss": 0.6508175134658813, "step": 13170 }, { "epoch": 16.160736196319018, "grad_norm": 0.2856801748275757, "learning_rate": 4.7757552811440566e-06, "loss": 0.5221280455589294, "step": 13171 }, { "epoch": 16.16196319018405, "grad_norm": 0.27448439598083496, "learning_rate": 4.772805174420844e-06, "loss": 0.7213163375854492, "step": 13172 }, { "epoch": 16.16319018404908, "grad_norm": 0.2653428018093109, "learning_rate": 4.769855882998167e-06, "loss": 0.6609193682670593, "step": 13173 }, { "epoch": 16.16441717791411, "grad_norm": 0.24832376837730408, "learning_rate": 4.766907406994903e-06, "loss": 0.5095094442367554, "step": 13174 }, { "epoch": 16.165644171779142, "grad_norm": 0.2930609881877899, "learning_rate": 4.7639597465299e-06, "loss": 0.6830508708953857, "step": 13175 }, { "epoch": 16.16687116564417, "grad_norm": 0.29002365469932556, "learning_rate": 4.761012901721957e-06, "loss": 0.5229792594909668, "step": 13176 }, { "epoch": 16.168098159509203, "grad_norm": 0.28105637431144714, "learning_rate": 4.7580668726898595e-06, "loss": 0.8450149297714233, "step": 13177 }, { "epoch": 16.169325153374235, "grad_norm": 0.30302131175994873, "learning_rate": 4.755121659552358e-06, "loss": 0.7883810997009277, "step": 13178 }, { "epoch": 16.170552147239263, "grad_norm": 0.2187521606683731, "learning_rate": 4.752177262428159e-06, "loss": 0.47563380002975464, "step": 13179 }, { "epoch": 16.171779141104295, "grad_norm": 0.27365240454673767, "learning_rate": 4.7492336814359466e-06, "loss": 0.542036235332489, "step": 13180 }, { "epoch": 16.173006134969324, "grad_norm": 0.31463176012039185, "learning_rate": 4.746290916694368e-06, "loss": 0.3805411458015442, "step": 13181 }, { "epoch": 16.174233128834356, "grad_norm": 0.25590071082115173, "learning_rate": 4.743348968322039e-06, "loss": 0.5801947116851807, "step": 13182 }, { "epoch": 16.175460122699388, "grad_norm": 0.24805468320846558, "learning_rate": 4.740407836437538e-06, "loss": 0.5966805219650269, "step": 13183 }, { "epoch": 16.176687116564416, "grad_norm": 0.27396267652511597, "learning_rate": 4.737467521159419e-06, "loss": 0.6823406219482422, "step": 13184 }, { "epoch": 16.177914110429448, "grad_norm": 0.2894447445869446, "learning_rate": 4.734528022606191e-06, "loss": 0.6406073570251465, "step": 13185 }, { "epoch": 16.17914110429448, "grad_norm": 0.2834652066230774, "learning_rate": 4.731589340896339e-06, "loss": 0.425714373588562, "step": 13186 }, { "epoch": 16.18036809815951, "grad_norm": 0.27729716897010803, "learning_rate": 4.72865147614831e-06, "loss": 0.5131691098213196, "step": 13187 }, { "epoch": 16.18159509202454, "grad_norm": 0.2799573242664337, "learning_rate": 4.725714428480527e-06, "loss": 0.5303713083267212, "step": 13188 }, { "epoch": 16.18282208588957, "grad_norm": 0.25994932651519775, "learning_rate": 4.722778198011376e-06, "loss": 0.4392682909965515, "step": 13189 }, { "epoch": 16.1840490797546, "grad_norm": 0.25617408752441406, "learning_rate": 4.719842784859199e-06, "loss": 0.6637478470802307, "step": 13190 }, { "epoch": 16.185276073619633, "grad_norm": 0.2382318377494812, "learning_rate": 4.716908189142319e-06, "loss": 0.5850287079811096, "step": 13191 }, { "epoch": 16.18650306748466, "grad_norm": 0.2544598877429962, "learning_rate": 4.713974410979019e-06, "loss": 0.5761870741844177, "step": 13192 }, { "epoch": 16.187730061349694, "grad_norm": 0.269155353307724, "learning_rate": 4.711041450487552e-06, "loss": 0.7380334138870239, "step": 13193 }, { "epoch": 16.188957055214726, "grad_norm": 0.28476956486701965, "learning_rate": 4.708109307786146e-06, "loss": 0.7908135652542114, "step": 13194 }, { "epoch": 16.190184049079754, "grad_norm": 0.22955350577831268, "learning_rate": 4.705177982992975e-06, "loss": 0.37272658944129944, "step": 13195 }, { "epoch": 16.191411042944786, "grad_norm": 0.26854830980300903, "learning_rate": 4.702247476226193e-06, "loss": 0.6922508478164673, "step": 13196 }, { "epoch": 16.192638036809814, "grad_norm": 0.26829928159713745, "learning_rate": 4.699317787603927e-06, "loss": 0.5979475975036621, "step": 13197 }, { "epoch": 16.193865030674846, "grad_norm": 0.27405938506126404, "learning_rate": 4.69638891724426e-06, "loss": 0.7163651585578918, "step": 13198 }, { "epoch": 16.19509202453988, "grad_norm": 0.46568503975868225, "learning_rate": 4.693460865265256e-06, "loss": 0.5337943434715271, "step": 13199 }, { "epoch": 16.196319018404907, "grad_norm": 0.21055616438388824, "learning_rate": 4.690533631784922e-06, "loss": 0.5126296281814575, "step": 13200 }, { "epoch": 16.19754601226994, "grad_norm": 0.28946927189826965, "learning_rate": 4.687607216921245e-06, "loss": 0.6053008437156677, "step": 13201 }, { "epoch": 16.19877300613497, "grad_norm": 0.2893691956996918, "learning_rate": 4.684681620792194e-06, "loss": 0.6773319840431213, "step": 13202 }, { "epoch": 16.2, "grad_norm": 0.27851349115371704, "learning_rate": 4.681756843515694e-06, "loss": 0.545873761177063, "step": 13203 }, { "epoch": 16.20122699386503, "grad_norm": 0.2847759425640106, "learning_rate": 4.678832885209622e-06, "loss": 0.5201053023338318, "step": 13204 }, { "epoch": 16.20245398773006, "grad_norm": 0.2738479971885681, "learning_rate": 4.6759097459918354e-06, "loss": 0.5916228890419006, "step": 13205 }, { "epoch": 16.20368098159509, "grad_norm": 0.25455865263938904, "learning_rate": 4.672987425980161e-06, "loss": 0.5692287683486938, "step": 13206 }, { "epoch": 16.204907975460124, "grad_norm": 0.245630145072937, "learning_rate": 4.6700659252923885e-06, "loss": 0.723638653755188, "step": 13207 }, { "epoch": 16.206134969325152, "grad_norm": 0.2689138352870941, "learning_rate": 4.6671452440462835e-06, "loss": 0.7015331983566284, "step": 13208 }, { "epoch": 16.207361963190184, "grad_norm": 0.28589722514152527, "learning_rate": 4.664225382359555e-06, "loss": 0.6418967843055725, "step": 13209 }, { "epoch": 16.208588957055216, "grad_norm": 0.23457679152488708, "learning_rate": 4.661306340349905e-06, "loss": 0.34803837537765503, "step": 13210 }, { "epoch": 16.209815950920245, "grad_norm": 0.3064819872379303, "learning_rate": 4.6583881181349845e-06, "loss": 0.5175788402557373, "step": 13211 }, { "epoch": 16.211042944785277, "grad_norm": 0.27626854181289673, "learning_rate": 4.6554707158324245e-06, "loss": 0.5262722373008728, "step": 13212 }, { "epoch": 16.212269938650305, "grad_norm": 0.26274681091308594, "learning_rate": 4.652554133559822e-06, "loss": 0.41854000091552734, "step": 13213 }, { "epoch": 16.213496932515337, "grad_norm": 0.3591626286506653, "learning_rate": 4.649638371434725e-06, "loss": 0.710054874420166, "step": 13214 }, { "epoch": 16.21472392638037, "grad_norm": 0.26058652997016907, "learning_rate": 4.646723429574662e-06, "loss": 0.6082108020782471, "step": 13215 }, { "epoch": 16.215950920245398, "grad_norm": 0.25370755791664124, "learning_rate": 4.64380930809713e-06, "loss": 0.4492613673210144, "step": 13216 }, { "epoch": 16.21717791411043, "grad_norm": 0.29002004861831665, "learning_rate": 4.640896007119585e-06, "loss": 0.4923219084739685, "step": 13217 }, { "epoch": 16.21840490797546, "grad_norm": 0.2603412866592407, "learning_rate": 4.6379835267594625e-06, "loss": 0.6093648076057434, "step": 13218 }, { "epoch": 16.21963190184049, "grad_norm": 0.24764655530452728, "learning_rate": 4.635071867134144e-06, "loss": 0.5922955274581909, "step": 13219 }, { "epoch": 16.220858895705522, "grad_norm": 0.2971126437187195, "learning_rate": 4.632161028360999e-06, "loss": 0.5750362873077393, "step": 13220 }, { "epoch": 16.22208588957055, "grad_norm": 0.24872688949108124, "learning_rate": 4.6292510105573475e-06, "loss": 0.7498136758804321, "step": 13221 }, { "epoch": 16.223312883435582, "grad_norm": 0.23495742678642273, "learning_rate": 4.6263418138404905e-06, "loss": 0.5556060075759888, "step": 13222 }, { "epoch": 16.224539877300614, "grad_norm": 0.24937911331653595, "learning_rate": 4.6234334383276874e-06, "loss": 0.5838548541069031, "step": 13223 }, { "epoch": 16.225766871165643, "grad_norm": 0.2405858188867569, "learning_rate": 4.620525884136165e-06, "loss": 0.5434048175811768, "step": 13224 }, { "epoch": 16.226993865030675, "grad_norm": 0.2521558701992035, "learning_rate": 4.617619151383123e-06, "loss": 0.6701222658157349, "step": 13225 }, { "epoch": 16.228220858895707, "grad_norm": 0.24123108386993408, "learning_rate": 4.614713240185719e-06, "loss": 0.5069430470466614, "step": 13226 }, { "epoch": 16.229447852760735, "grad_norm": 0.26898515224456787, "learning_rate": 4.61180815066109e-06, "loss": 0.5964100956916809, "step": 13227 }, { "epoch": 16.230674846625767, "grad_norm": 0.3032064437866211, "learning_rate": 4.608903882926319e-06, "loss": 0.5420168042182922, "step": 13228 }, { "epoch": 16.2319018404908, "grad_norm": 0.2433241754770279, "learning_rate": 4.606000437098476e-06, "loss": 0.5716078281402588, "step": 13229 }, { "epoch": 16.233128834355828, "grad_norm": 0.3074144124984741, "learning_rate": 4.603097813294591e-06, "loss": 0.6172566413879395, "step": 13230 }, { "epoch": 16.23435582822086, "grad_norm": 0.250888854265213, "learning_rate": 4.600196011631655e-06, "loss": 0.6087924242019653, "step": 13231 }, { "epoch": 16.235582822085888, "grad_norm": 0.2733483910560608, "learning_rate": 4.597295032226645e-06, "loss": 0.5378131866455078, "step": 13232 }, { "epoch": 16.23680981595092, "grad_norm": 0.26539894938468933, "learning_rate": 4.594394875196476e-06, "loss": 0.38937902450561523, "step": 13233 }, { "epoch": 16.238036809815952, "grad_norm": 0.2345060408115387, "learning_rate": 4.59149554065805e-06, "loss": 0.5817970633506775, "step": 13234 }, { "epoch": 16.23926380368098, "grad_norm": 0.25764191150665283, "learning_rate": 4.588597028728233e-06, "loss": 0.40691524744033813, "step": 13235 }, { "epoch": 16.240490797546013, "grad_norm": 0.26516881585121155, "learning_rate": 4.585699339523855e-06, "loss": 0.8144074082374573, "step": 13236 }, { "epoch": 16.241717791411045, "grad_norm": 0.25674954056739807, "learning_rate": 4.5828024731617184e-06, "loss": 0.6455665826797485, "step": 13237 }, { "epoch": 16.242944785276073, "grad_norm": 0.2434859424829483, "learning_rate": 4.579906429758577e-06, "loss": 0.586162269115448, "step": 13238 }, { "epoch": 16.244171779141105, "grad_norm": 0.3095175325870514, "learning_rate": 4.577011209431164e-06, "loss": 0.650199830532074, "step": 13239 }, { "epoch": 16.245398773006134, "grad_norm": 0.23989199101924896, "learning_rate": 4.574116812296183e-06, "loss": 0.5695067644119263, "step": 13240 }, { "epoch": 16.246625766871166, "grad_norm": 0.27976739406585693, "learning_rate": 4.571223238470298e-06, "loss": 0.5434484481811523, "step": 13241 }, { "epoch": 16.247852760736198, "grad_norm": 0.25708526372909546, "learning_rate": 4.568330488070147e-06, "loss": 0.6144218444824219, "step": 13242 }, { "epoch": 16.249079754601226, "grad_norm": 0.2699223458766937, "learning_rate": 4.565438561212312e-06, "loss": 0.6801612973213196, "step": 13243 }, { "epoch": 16.250306748466258, "grad_norm": 0.2853533923625946, "learning_rate": 4.5625474580133695e-06, "loss": 0.5621583461761475, "step": 13244 }, { "epoch": 16.25153374233129, "grad_norm": 0.25511428713798523, "learning_rate": 4.559657178589843e-06, "loss": 0.6514368653297424, "step": 13245 }, { "epoch": 16.25276073619632, "grad_norm": 0.2834816575050354, "learning_rate": 4.55676772305825e-06, "loss": 0.6605062484741211, "step": 13246 }, { "epoch": 16.25398773006135, "grad_norm": 0.2242036908864975, "learning_rate": 4.553879091535038e-06, "loss": 0.4322168231010437, "step": 13247 }, { "epoch": 16.25521472392638, "grad_norm": 0.26797494292259216, "learning_rate": 4.550991284136644e-06, "loss": 0.6573880910873413, "step": 13248 }, { "epoch": 16.25644171779141, "grad_norm": 0.257305383682251, "learning_rate": 4.5481043009794696e-06, "loss": 0.612292468547821, "step": 13249 }, { "epoch": 16.257668711656443, "grad_norm": 0.2720193862915039, "learning_rate": 4.545218142179883e-06, "loss": 0.629028856754303, "step": 13250 }, { "epoch": 16.25889570552147, "grad_norm": 0.2993520200252533, "learning_rate": 4.542332807854216e-06, "loss": 0.5095069408416748, "step": 13251 }, { "epoch": 16.260122699386503, "grad_norm": 0.2738068401813507, "learning_rate": 4.539448298118765e-06, "loss": 0.5855487585067749, "step": 13252 }, { "epoch": 16.261349693251535, "grad_norm": 0.2473188042640686, "learning_rate": 4.536564613089794e-06, "loss": 0.609686017036438, "step": 13253 }, { "epoch": 16.262576687116564, "grad_norm": 0.28234195709228516, "learning_rate": 4.533681752883542e-06, "loss": 0.6744356155395508, "step": 13254 }, { "epoch": 16.263803680981596, "grad_norm": 0.2526891529560089, "learning_rate": 4.530799717616205e-06, "loss": 0.6174958944320679, "step": 13255 }, { "epoch": 16.265030674846624, "grad_norm": 0.27980300784111023, "learning_rate": 4.527918507403958e-06, "loss": 0.6797261834144592, "step": 13256 }, { "epoch": 16.266257668711656, "grad_norm": 0.2733054757118225, "learning_rate": 4.525038122362924e-06, "loss": 0.4258878231048584, "step": 13257 }, { "epoch": 16.26748466257669, "grad_norm": 0.2629052400588989, "learning_rate": 4.522158562609205e-06, "loss": 0.5876846313476562, "step": 13258 }, { "epoch": 16.268711656441717, "grad_norm": 0.25630512833595276, "learning_rate": 4.519279828258869e-06, "loss": 0.5347350835800171, "step": 13259 }, { "epoch": 16.26993865030675, "grad_norm": 0.2271547168493271, "learning_rate": 4.516401919427951e-06, "loss": 0.5061699151992798, "step": 13260 }, { "epoch": 16.27116564417178, "grad_norm": 0.2658248841762543, "learning_rate": 4.513524836232458e-06, "loss": 0.6206983327865601, "step": 13261 }, { "epoch": 16.27239263803681, "grad_norm": 0.3125603199005127, "learning_rate": 4.51064857878834e-06, "loss": 0.6152169704437256, "step": 13262 }, { "epoch": 16.27361963190184, "grad_norm": 0.27204352617263794, "learning_rate": 4.507773147211544e-06, "loss": 0.5869128704071045, "step": 13263 }, { "epoch": 16.27484662576687, "grad_norm": 0.25714296102523804, "learning_rate": 4.5048985416179634e-06, "loss": 0.404366135597229, "step": 13264 }, { "epoch": 16.2760736196319, "grad_norm": 0.28522881865501404, "learning_rate": 4.502024762123472e-06, "loss": 0.6333240270614624, "step": 13265 }, { "epoch": 16.277300613496934, "grad_norm": 0.2809908390045166, "learning_rate": 4.499151808843899e-06, "loss": 0.5930309891700745, "step": 13266 }, { "epoch": 16.278527607361962, "grad_norm": 0.2909790277481079, "learning_rate": 4.496279681895046e-06, "loss": 0.7207880020141602, "step": 13267 }, { "epoch": 16.279754601226994, "grad_norm": 0.23641055822372437, "learning_rate": 4.49340838139268e-06, "loss": 0.46466630697250366, "step": 13268 }, { "epoch": 16.280981595092026, "grad_norm": 0.3229680359363556, "learning_rate": 4.490537907452538e-06, "loss": 0.5813896059989929, "step": 13269 }, { "epoch": 16.282208588957054, "grad_norm": 0.2439122200012207, "learning_rate": 4.487668260190317e-06, "loss": 0.6035223603248596, "step": 13270 }, { "epoch": 16.283435582822086, "grad_norm": 0.2680279016494751, "learning_rate": 4.48479943972169e-06, "loss": 0.5062393546104431, "step": 13271 }, { "epoch": 16.284662576687115, "grad_norm": 0.2536420226097107, "learning_rate": 4.481931446162282e-06, "loss": 0.5383325219154358, "step": 13272 }, { "epoch": 16.285889570552147, "grad_norm": 0.234199658036232, "learning_rate": 4.4790642796276986e-06, "loss": 0.45381516218185425, "step": 13273 }, { "epoch": 16.28711656441718, "grad_norm": 0.27078890800476074, "learning_rate": 4.4761979402335034e-06, "loss": 0.49841147661209106, "step": 13274 }, { "epoch": 16.288343558282207, "grad_norm": 0.2502697706222534, "learning_rate": 4.4733324280952365e-06, "loss": 0.5513570308685303, "step": 13275 }, { "epoch": 16.28957055214724, "grad_norm": 0.25519081950187683, "learning_rate": 4.470467743328399e-06, "loss": 0.4748937785625458, "step": 13276 }, { "epoch": 16.29079754601227, "grad_norm": 0.3046509325504303, "learning_rate": 4.467603886048452e-06, "loss": 0.7614137530326843, "step": 13277 }, { "epoch": 16.2920245398773, "grad_norm": 0.26422688364982605, "learning_rate": 4.464740856370831e-06, "loss": 0.525684654712677, "step": 13278 }, { "epoch": 16.293251533742332, "grad_norm": 0.23026792705059052, "learning_rate": 4.461878654410937e-06, "loss": 0.5349068641662598, "step": 13279 }, { "epoch": 16.29447852760736, "grad_norm": 0.24230101704597473, "learning_rate": 4.459017280284136e-06, "loss": 0.5063974857330322, "step": 13280 }, { "epoch": 16.295705521472392, "grad_norm": 0.2706296741962433, "learning_rate": 4.456156734105774e-06, "loss": 0.6558905839920044, "step": 13281 }, { "epoch": 16.296932515337424, "grad_norm": 0.26920396089553833, "learning_rate": 4.453297015991131e-06, "loss": 0.7345442771911621, "step": 13282 }, { "epoch": 16.298159509202453, "grad_norm": 0.23520886898040771, "learning_rate": 4.450438126055487e-06, "loss": 0.6086827516555786, "step": 13283 }, { "epoch": 16.299386503067485, "grad_norm": 0.2530915141105652, "learning_rate": 4.447580064414072e-06, "loss": 0.6675788164138794, "step": 13284 }, { "epoch": 16.300613496932517, "grad_norm": 0.2812601625919342, "learning_rate": 4.444722831182088e-06, "loss": 0.7057855129241943, "step": 13285 }, { "epoch": 16.301840490797545, "grad_norm": 0.2594659626483917, "learning_rate": 4.441866426474706e-06, "loss": 0.5705575346946716, "step": 13286 }, { "epoch": 16.303067484662577, "grad_norm": 0.2782536447048187, "learning_rate": 4.43901085040705e-06, "loss": 0.3650239109992981, "step": 13287 }, { "epoch": 16.30429447852761, "grad_norm": 0.2798900902271271, "learning_rate": 4.4361561030942245e-06, "loss": 0.6121083498001099, "step": 13288 }, { "epoch": 16.305521472392638, "grad_norm": 0.2620481252670288, "learning_rate": 4.4333021846512945e-06, "loss": 0.6883023381233215, "step": 13289 }, { "epoch": 16.30674846625767, "grad_norm": 0.235055074095726, "learning_rate": 4.430449095193298e-06, "loss": 0.368591845035553, "step": 13290 }, { "epoch": 16.307975460122698, "grad_norm": 0.28983405232429504, "learning_rate": 4.4275968348352295e-06, "loss": 0.6744905114173889, "step": 13291 }, { "epoch": 16.30920245398773, "grad_norm": 0.24102449417114258, "learning_rate": 4.42474540369206e-06, "loss": 0.476156622171402, "step": 13292 }, { "epoch": 16.310429447852762, "grad_norm": 0.26800981163978577, "learning_rate": 4.421894801878723e-06, "loss": 0.5201377272605896, "step": 13293 }, { "epoch": 16.31165644171779, "grad_norm": 0.22950243949890137, "learning_rate": 4.419045029510113e-06, "loss": 0.5924952030181885, "step": 13294 }, { "epoch": 16.312883435582823, "grad_norm": 0.26236552000045776, "learning_rate": 4.416196086701105e-06, "loss": 0.5750407576560974, "step": 13295 }, { "epoch": 16.314110429447855, "grad_norm": 0.2786880433559418, "learning_rate": 4.413347973566523e-06, "loss": 0.6254115700721741, "step": 13296 }, { "epoch": 16.315337423312883, "grad_norm": 0.2444990873336792, "learning_rate": 4.410500690221167e-06, "loss": 0.48151805996894836, "step": 13297 }, { "epoch": 16.316564417177915, "grad_norm": 0.29292818903923035, "learning_rate": 4.407654236779804e-06, "loss": 0.5759745836257935, "step": 13298 }, { "epoch": 16.317791411042943, "grad_norm": 0.4311799108982086, "learning_rate": 4.40480861335717e-06, "loss": 0.6061057448387146, "step": 13299 }, { "epoch": 16.319018404907975, "grad_norm": 0.24517597258090973, "learning_rate": 4.401963820067964e-06, "loss": 0.555873453617096, "step": 13300 }, { "epoch": 16.320245398773007, "grad_norm": 0.3157128691673279, "learning_rate": 4.399119857026845e-06, "loss": 0.5469267964363098, "step": 13301 }, { "epoch": 16.321472392638036, "grad_norm": 0.23994699120521545, "learning_rate": 4.3962767243484495e-06, "loss": 0.6229742169380188, "step": 13302 }, { "epoch": 16.322699386503068, "grad_norm": 0.25294336676597595, "learning_rate": 4.393434422147377e-06, "loss": 0.5897321701049805, "step": 13303 }, { "epoch": 16.3239263803681, "grad_norm": 0.26979780197143555, "learning_rate": 4.390592950538189e-06, "loss": 0.6725951433181763, "step": 13304 }, { "epoch": 16.32515337423313, "grad_norm": 0.2991276979446411, "learning_rate": 4.387752309635426e-06, "loss": 0.4308876097202301, "step": 13305 }, { "epoch": 16.32638036809816, "grad_norm": 0.30408018827438354, "learning_rate": 4.384912499553573e-06, "loss": 0.6699692606925964, "step": 13306 }, { "epoch": 16.32760736196319, "grad_norm": 0.28968527913093567, "learning_rate": 4.382073520407101e-06, "loss": 0.5594722032546997, "step": 13307 }, { "epoch": 16.32883435582822, "grad_norm": 0.27474161982536316, "learning_rate": 4.379235372310442e-06, "loss": 0.6353648900985718, "step": 13308 }, { "epoch": 16.330061349693253, "grad_norm": 0.2334328293800354, "learning_rate": 4.376398055377992e-06, "loss": 0.3060716688632965, "step": 13309 }, { "epoch": 16.33128834355828, "grad_norm": 0.28736889362335205, "learning_rate": 4.3735615697241214e-06, "loss": 0.4233476221561432, "step": 13310 }, { "epoch": 16.332515337423313, "grad_norm": 0.2424619048833847, "learning_rate": 4.3707259154631504e-06, "loss": 0.3966156244277954, "step": 13311 }, { "epoch": 16.333742331288345, "grad_norm": 0.23782390356063843, "learning_rate": 4.367891092709375e-06, "loss": 0.5764304399490356, "step": 13312 }, { "epoch": 16.334969325153374, "grad_norm": 0.26481691002845764, "learning_rate": 4.365057101577072e-06, "loss": 0.6435714960098267, "step": 13313 }, { "epoch": 16.336196319018406, "grad_norm": 0.25812074542045593, "learning_rate": 4.3622239421804715e-06, "loss": 0.6116691827774048, "step": 13314 }, { "epoch": 16.337423312883434, "grad_norm": 0.2578902840614319, "learning_rate": 4.359391614633754e-06, "loss": 0.5610167384147644, "step": 13315 }, { "epoch": 16.338650306748466, "grad_norm": 0.2671012580394745, "learning_rate": 4.356560119051093e-06, "loss": 0.7131375074386597, "step": 13316 }, { "epoch": 16.339877300613498, "grad_norm": 0.2858607769012451, "learning_rate": 4.353729455546615e-06, "loss": 0.613631546497345, "step": 13317 }, { "epoch": 16.341104294478527, "grad_norm": 0.2606120705604553, "learning_rate": 4.350899624234419e-06, "loss": 0.5065590739250183, "step": 13318 }, { "epoch": 16.34233128834356, "grad_norm": 0.2992267906665802, "learning_rate": 4.348070625228576e-06, "loss": 0.7321280837059021, "step": 13319 }, { "epoch": 16.34355828220859, "grad_norm": 0.269534170627594, "learning_rate": 4.345242458643095e-06, "loss": 0.4820288419723511, "step": 13320 }, { "epoch": 16.34478527607362, "grad_norm": 0.25759586691856384, "learning_rate": 4.342415124591981e-06, "loss": 0.7140194177627563, "step": 13321 }, { "epoch": 16.34601226993865, "grad_norm": 0.237970769405365, "learning_rate": 4.339588623189197e-06, "loss": 0.513908863067627, "step": 13322 }, { "epoch": 16.34723926380368, "grad_norm": 0.2996392548084259, "learning_rate": 4.3367629545486705e-06, "loss": 0.771621823310852, "step": 13323 }, { "epoch": 16.34846625766871, "grad_norm": 0.28651612997055054, "learning_rate": 4.3339381187843005e-06, "loss": 0.5779035091400146, "step": 13324 }, { "epoch": 16.349693251533743, "grad_norm": 0.2705448865890503, "learning_rate": 4.331114116009938e-06, "loss": 0.45130228996276855, "step": 13325 }, { "epoch": 16.350920245398772, "grad_norm": 0.24270370602607727, "learning_rate": 4.328290946339417e-06, "loss": 0.6736437082290649, "step": 13326 }, { "epoch": 16.352147239263804, "grad_norm": 0.2757456302642822, "learning_rate": 4.3254686098865295e-06, "loss": 0.5653907656669617, "step": 13327 }, { "epoch": 16.353374233128836, "grad_norm": 0.2824174761772156, "learning_rate": 4.322647106765038e-06, "loss": 0.6830002665519714, "step": 13328 }, { "epoch": 16.354601226993864, "grad_norm": 0.259770005941391, "learning_rate": 4.319826437088672e-06, "loss": 0.530710756778717, "step": 13329 }, { "epoch": 16.355828220858896, "grad_norm": 0.23922930657863617, "learning_rate": 4.317006600971115e-06, "loss": 0.37981969118118286, "step": 13330 }, { "epoch": 16.357055214723925, "grad_norm": 0.32526251673698425, "learning_rate": 4.314187598526034e-06, "loss": 0.7703301906585693, "step": 13331 }, { "epoch": 16.358282208588957, "grad_norm": 0.2806652784347534, "learning_rate": 4.311369429867052e-06, "loss": 0.7434009313583374, "step": 13332 }, { "epoch": 16.35950920245399, "grad_norm": 0.28032243251800537, "learning_rate": 4.308552095107762e-06, "loss": 0.5259553790092468, "step": 13333 }, { "epoch": 16.360736196319017, "grad_norm": 0.271475225687027, "learning_rate": 4.305735594361723e-06, "loss": 0.5726152658462524, "step": 13334 }, { "epoch": 16.36196319018405, "grad_norm": 0.2869928479194641, "learning_rate": 4.302919927742461e-06, "loss": 0.6598101258277893, "step": 13335 }, { "epoch": 16.36319018404908, "grad_norm": 0.2597189247608185, "learning_rate": 4.300105095363466e-06, "loss": 0.5569853782653809, "step": 13336 }, { "epoch": 16.36441717791411, "grad_norm": 0.2527618408203125, "learning_rate": 4.297291097338196e-06, "loss": 0.5918699502944946, "step": 13337 }, { "epoch": 16.36564417177914, "grad_norm": 0.2805441915988922, "learning_rate": 4.2944779337800865e-06, "loss": 0.46389859914779663, "step": 13338 }, { "epoch": 16.36687116564417, "grad_norm": 0.35142773389816284, "learning_rate": 4.291665604802505e-06, "loss": 0.5458855628967285, "step": 13339 }, { "epoch": 16.368098159509202, "grad_norm": 0.2797529399394989, "learning_rate": 4.288854110518825e-06, "loss": 0.7147852182388306, "step": 13340 }, { "epoch": 16.369325153374234, "grad_norm": 0.2094280868768692, "learning_rate": 4.2860434510423655e-06, "loss": 0.34759971499443054, "step": 13341 }, { "epoch": 16.370552147239263, "grad_norm": 0.2544613480567932, "learning_rate": 4.283233626486416e-06, "loss": 0.38460299372673035, "step": 13342 }, { "epoch": 16.371779141104295, "grad_norm": 0.26101139187812805, "learning_rate": 4.280424636964239e-06, "loss": 0.477523535490036, "step": 13343 }, { "epoch": 16.373006134969327, "grad_norm": 0.22312501072883606, "learning_rate": 4.277616482589044e-06, "loss": 0.3388088345527649, "step": 13344 }, { "epoch": 16.374233128834355, "grad_norm": 0.2512829005718231, "learning_rate": 4.274809163474028e-06, "loss": 0.5950487852096558, "step": 13345 }, { "epoch": 16.375460122699387, "grad_norm": 0.3198610544204712, "learning_rate": 4.272002679732343e-06, "loss": 0.48951566219329834, "step": 13346 }, { "epoch": 16.376687116564415, "grad_norm": 0.31457027792930603, "learning_rate": 4.269197031477112e-06, "loss": 0.5846658945083618, "step": 13347 }, { "epoch": 16.377914110429447, "grad_norm": 0.26134762167930603, "learning_rate": 4.26639221882143e-06, "loss": 0.5313738584518433, "step": 13348 }, { "epoch": 16.37914110429448, "grad_norm": 0.2685260772705078, "learning_rate": 4.263588241878336e-06, "loss": 0.6785080432891846, "step": 13349 }, { "epoch": 16.380368098159508, "grad_norm": 0.27047011256217957, "learning_rate": 4.260785100760859e-06, "loss": 0.6503673791885376, "step": 13350 }, { "epoch": 16.38159509202454, "grad_norm": 0.22431674599647522, "learning_rate": 4.257982795581983e-06, "loss": 0.42191779613494873, "step": 13351 }, { "epoch": 16.382822085889572, "grad_norm": 0.2553509771823883, "learning_rate": 4.255181326454663e-06, "loss": 0.7838529348373413, "step": 13352 }, { "epoch": 16.3840490797546, "grad_norm": 0.26558345556259155, "learning_rate": 4.252380693491825e-06, "loss": 0.6579726934432983, "step": 13353 }, { "epoch": 16.385276073619632, "grad_norm": 0.3006638288497925, "learning_rate": 4.249580896806341e-06, "loss": 0.5513029098510742, "step": 13354 }, { "epoch": 16.38650306748466, "grad_norm": 0.2821108400821686, "learning_rate": 4.246781936511069e-06, "loss": 0.50411057472229, "step": 13355 }, { "epoch": 16.387730061349693, "grad_norm": 0.2343277633190155, "learning_rate": 4.243983812718819e-06, "loss": 0.4428092837333679, "step": 13356 }, { "epoch": 16.388957055214725, "grad_norm": 0.2414676994085312, "learning_rate": 4.241186525542395e-06, "loss": 0.3781315088272095, "step": 13357 }, { "epoch": 16.390184049079753, "grad_norm": 0.27212196588516235, "learning_rate": 4.238390075094534e-06, "loss": 0.5532770156860352, "step": 13358 }, { "epoch": 16.391411042944785, "grad_norm": 0.2620725631713867, "learning_rate": 4.235594461487952e-06, "loss": 0.6142215728759766, "step": 13359 }, { "epoch": 16.392638036809817, "grad_norm": 0.3025754690170288, "learning_rate": 4.232799684835337e-06, "loss": 0.46810686588287354, "step": 13360 }, { "epoch": 16.393865030674846, "grad_norm": 0.2802875339984894, "learning_rate": 4.230005745249335e-06, "loss": 0.6000072360038757, "step": 13361 }, { "epoch": 16.395092024539878, "grad_norm": 0.26982107758522034, "learning_rate": 4.227212642842573e-06, "loss": 0.7091546654701233, "step": 13362 }, { "epoch": 16.39631901840491, "grad_norm": 0.2467152625322342, "learning_rate": 4.224420377727614e-06, "loss": 0.5401327610015869, "step": 13363 }, { "epoch": 16.397546012269938, "grad_norm": 0.2861972451210022, "learning_rate": 4.2216289500170175e-06, "loss": 0.5623261332511902, "step": 13364 }, { "epoch": 16.39877300613497, "grad_norm": 0.23508138954639435, "learning_rate": 4.218838359823296e-06, "loss": 0.4473956823348999, "step": 13365 }, { "epoch": 16.4, "grad_norm": 0.26885947585105896, "learning_rate": 4.216048607258929e-06, "loss": 0.6414278745651245, "step": 13366 }, { "epoch": 16.40122699386503, "grad_norm": 0.29423317313194275, "learning_rate": 4.213259692436367e-06, "loss": 0.5475013852119446, "step": 13367 }, { "epoch": 16.402453987730063, "grad_norm": 0.260845422744751, "learning_rate": 4.210471615468029e-06, "loss": 0.5661296844482422, "step": 13368 }, { "epoch": 16.40368098159509, "grad_norm": 0.28073933720588684, "learning_rate": 4.207684376466275e-06, "loss": 0.808427095413208, "step": 13369 }, { "epoch": 16.404907975460123, "grad_norm": 0.25166353583335876, "learning_rate": 4.2048979755434675e-06, "loss": 0.677880048751831, "step": 13370 }, { "epoch": 16.406134969325155, "grad_norm": 0.28639110922813416, "learning_rate": 4.2021124128119096e-06, "loss": 0.44624748826026917, "step": 13371 }, { "epoch": 16.407361963190183, "grad_norm": 0.2516964077949524, "learning_rate": 4.199327688383883e-06, "loss": 0.43115800619125366, "step": 13372 }, { "epoch": 16.408588957055215, "grad_norm": 0.25553062558174133, "learning_rate": 4.1965438023716415e-06, "loss": 0.3531637191772461, "step": 13373 }, { "epoch": 16.409815950920244, "grad_norm": 0.22506451606750488, "learning_rate": 4.193760754887377e-06, "loss": 0.5543330907821655, "step": 13374 }, { "epoch": 16.411042944785276, "grad_norm": 0.3032040596008301, "learning_rate": 4.190978546043276e-06, "loss": 0.3590158522129059, "step": 13375 }, { "epoch": 16.412269938650308, "grad_norm": 0.2831694483757019, "learning_rate": 4.18819717595148e-06, "loss": 0.40508759021759033, "step": 13376 }, { "epoch": 16.413496932515336, "grad_norm": 0.26427003741264343, "learning_rate": 4.185416644724099e-06, "loss": 0.6606460809707642, "step": 13377 }, { "epoch": 16.41472392638037, "grad_norm": 0.33334052562713623, "learning_rate": 4.182636952473215e-06, "loss": 0.5703439712524414, "step": 13378 }, { "epoch": 16.4159509202454, "grad_norm": 0.30359992384910583, "learning_rate": 4.1798580993108485e-06, "loss": 0.8621118068695068, "step": 13379 }, { "epoch": 16.41717791411043, "grad_norm": 0.6548107266426086, "learning_rate": 4.177080085349027e-06, "loss": 0.5615822076797485, "step": 13380 }, { "epoch": 16.41840490797546, "grad_norm": 0.2778775691986084, "learning_rate": 4.174302910699721e-06, "loss": 0.7408992052078247, "step": 13381 }, { "epoch": 16.41963190184049, "grad_norm": 0.2583412528038025, "learning_rate": 4.171526575474874e-06, "loss": 0.5068303942680359, "step": 13382 }, { "epoch": 16.42085889570552, "grad_norm": 0.2509898841381073, "learning_rate": 4.1687510797863785e-06, "loss": 0.4926745593547821, "step": 13383 }, { "epoch": 16.422085889570553, "grad_norm": 0.29996153712272644, "learning_rate": 4.1659764237461125e-06, "loss": 0.43143850564956665, "step": 13384 }, { "epoch": 16.42331288343558, "grad_norm": 0.23654234409332275, "learning_rate": 4.163202607465919e-06, "loss": 0.5020192861557007, "step": 13385 }, { "epoch": 16.424539877300614, "grad_norm": 0.28411203622817993, "learning_rate": 4.1604296310575995e-06, "loss": 0.6138793230056763, "step": 13386 }, { "epoch": 16.425766871165646, "grad_norm": 0.26792648434638977, "learning_rate": 4.157657494632933e-06, "loss": 0.42295002937316895, "step": 13387 }, { "epoch": 16.426993865030674, "grad_norm": 0.29797348380088806, "learning_rate": 4.15488619830364e-06, "loss": 0.7879992723464966, "step": 13388 }, { "epoch": 16.428220858895706, "grad_norm": 0.23776346445083618, "learning_rate": 4.152115742181434e-06, "loss": 0.6064922213554382, "step": 13389 }, { "epoch": 16.429447852760735, "grad_norm": 0.31760767102241516, "learning_rate": 4.149346126377978e-06, "loss": 0.5093121528625488, "step": 13390 }, { "epoch": 16.430674846625767, "grad_norm": 0.26244282722473145, "learning_rate": 4.146577351004919e-06, "loss": 0.6909439563751221, "step": 13391 }, { "epoch": 16.4319018404908, "grad_norm": 0.23974914848804474, "learning_rate": 4.143809416173852e-06, "loss": 0.6135148406028748, "step": 13392 }, { "epoch": 16.433128834355827, "grad_norm": 0.2982504665851593, "learning_rate": 4.141042321996338e-06, "loss": 0.6108731627464294, "step": 13393 }, { "epoch": 16.43435582822086, "grad_norm": 0.26665908098220825, "learning_rate": 4.138276068583918e-06, "loss": 0.7008980512619019, "step": 13394 }, { "epoch": 16.43558282208589, "grad_norm": 0.2646900415420532, "learning_rate": 4.135510656048089e-06, "loss": 0.6589130163192749, "step": 13395 }, { "epoch": 16.43680981595092, "grad_norm": 0.2514060437679291, "learning_rate": 4.132746084500319e-06, "loss": 0.6376365423202515, "step": 13396 }, { "epoch": 16.43803680981595, "grad_norm": 0.2564666271209717, "learning_rate": 4.129982354052045e-06, "loss": 0.6712174415588379, "step": 13397 }, { "epoch": 16.43926380368098, "grad_norm": 0.27303263545036316, "learning_rate": 4.127219464814652e-06, "loss": 0.5700410008430481, "step": 13398 }, { "epoch": 16.440490797546012, "grad_norm": 0.25012513995170593, "learning_rate": 4.124457416899513e-06, "loss": 0.5076581835746765, "step": 13399 }, { "epoch": 16.441717791411044, "grad_norm": 0.2746155560016632, "learning_rate": 4.121696210417955e-06, "loss": 0.530373752117157, "step": 13400 }, { "epoch": 16.442944785276072, "grad_norm": 0.2735922038555145, "learning_rate": 4.118935845481278e-06, "loss": 0.808706521987915, "step": 13401 }, { "epoch": 16.444171779141104, "grad_norm": 0.26416948437690735, "learning_rate": 4.116176322200741e-06, "loss": 0.5659494996070862, "step": 13402 }, { "epoch": 16.445398773006136, "grad_norm": 0.29343947768211365, "learning_rate": 4.1134176406875755e-06, "loss": 0.6505990028381348, "step": 13403 }, { "epoch": 16.446625766871165, "grad_norm": 0.28647351264953613, "learning_rate": 4.110659801052971e-06, "loss": 0.5605823397636414, "step": 13404 }, { "epoch": 16.447852760736197, "grad_norm": 0.3571493625640869, "learning_rate": 4.107902803408096e-06, "loss": 0.879448652267456, "step": 13405 }, { "epoch": 16.449079754601225, "grad_norm": 0.2630905210971832, "learning_rate": 4.105146647864075e-06, "loss": 0.5691258311271667, "step": 13406 }, { "epoch": 16.450306748466257, "grad_norm": 0.25223666429519653, "learning_rate": 4.102391334531997e-06, "loss": 0.27168866991996765, "step": 13407 }, { "epoch": 16.45153374233129, "grad_norm": 0.26073598861694336, "learning_rate": 4.0996368635229184e-06, "loss": 0.32190677523612976, "step": 13408 }, { "epoch": 16.452760736196318, "grad_norm": 0.24845005571842194, "learning_rate": 4.0968832349478685e-06, "loss": 0.6232713460922241, "step": 13409 }, { "epoch": 16.45398773006135, "grad_norm": 0.24012811481952667, "learning_rate": 4.0941304489178405e-06, "loss": 0.5685732364654541, "step": 13410 }, { "epoch": 16.45521472392638, "grad_norm": 0.26549461483955383, "learning_rate": 4.0913785055437915e-06, "loss": 0.6801480054855347, "step": 13411 }, { "epoch": 16.45644171779141, "grad_norm": 0.259347140789032, "learning_rate": 4.088627404936637e-06, "loss": 0.6137796640396118, "step": 13412 }, { "epoch": 16.457668711656442, "grad_norm": 0.27664729952812195, "learning_rate": 4.0858771472072735e-06, "loss": 0.4919111430644989, "step": 13413 }, { "epoch": 16.45889570552147, "grad_norm": 0.23432153463363647, "learning_rate": 4.083127732466549e-06, "loss": 0.5132556557655334, "step": 13414 }, { "epoch": 16.460122699386503, "grad_norm": 0.30956846475601196, "learning_rate": 4.08037916082529e-06, "loss": 0.6625803112983704, "step": 13415 }, { "epoch": 16.461349693251535, "grad_norm": 0.2586789131164551, "learning_rate": 4.077631432394291e-06, "loss": 0.5809575319290161, "step": 13416 }, { "epoch": 16.462576687116563, "grad_norm": 0.28565558791160583, "learning_rate": 4.0748845472842885e-06, "loss": 0.7154948711395264, "step": 13417 }, { "epoch": 16.463803680981595, "grad_norm": 0.27826476097106934, "learning_rate": 4.072138505606008e-06, "loss": 0.7317942380905151, "step": 13418 }, { "epoch": 16.465030674846627, "grad_norm": 0.2473595142364502, "learning_rate": 4.069393307470137e-06, "loss": 0.6711708307266235, "step": 13419 }, { "epoch": 16.466257668711656, "grad_norm": 0.2665158808231354, "learning_rate": 4.0666489529873286e-06, "loss": 0.6437917947769165, "step": 13420 }, { "epoch": 16.467484662576688, "grad_norm": 0.31816554069519043, "learning_rate": 4.063905442268201e-06, "loss": 0.5828184485435486, "step": 13421 }, { "epoch": 16.46871165644172, "grad_norm": 0.26459747552871704, "learning_rate": 4.061162775423327e-06, "loss": 0.43357664346694946, "step": 13422 }, { "epoch": 16.469938650306748, "grad_norm": 0.24702690541744232, "learning_rate": 4.058420952563255e-06, "loss": 0.26038432121276855, "step": 13423 }, { "epoch": 16.47116564417178, "grad_norm": 0.20874831080436707, "learning_rate": 4.055679973798515e-06, "loss": 0.37112128734588623, "step": 13424 }, { "epoch": 16.47239263803681, "grad_norm": 0.28118160367012024, "learning_rate": 4.052939839239586e-06, "loss": 0.5966765880584717, "step": 13425 }, { "epoch": 16.47361963190184, "grad_norm": 0.30587637424468994, "learning_rate": 4.050200548996905e-06, "loss": 0.5127466917037964, "step": 13426 }, { "epoch": 16.474846625766872, "grad_norm": 0.2888067960739136, "learning_rate": 4.047462103180885e-06, "loss": 0.6916700005531311, "step": 13427 }, { "epoch": 16.4760736196319, "grad_norm": 0.2779165804386139, "learning_rate": 4.044724501901912e-06, "loss": 0.5314568281173706, "step": 13428 }, { "epoch": 16.477300613496933, "grad_norm": 0.24601253867149353, "learning_rate": 4.041987745270328e-06, "loss": 0.5347415208816528, "step": 13429 }, { "epoch": 16.478527607361965, "grad_norm": 0.2849120795726776, "learning_rate": 4.039251833396448e-06, "loss": 0.575636625289917, "step": 13430 }, { "epoch": 16.479754601226993, "grad_norm": 0.25745925307273865, "learning_rate": 4.036516766390541e-06, "loss": 0.6332404613494873, "step": 13431 }, { "epoch": 16.480981595092025, "grad_norm": 0.2450753003358841, "learning_rate": 4.0337825443628516e-06, "loss": 0.36529332399368286, "step": 13432 }, { "epoch": 16.482208588957054, "grad_norm": 0.26194727420806885, "learning_rate": 4.031049167423592e-06, "loss": 0.9527350664138794, "step": 13433 }, { "epoch": 16.483435582822086, "grad_norm": 0.2784717082977295, "learning_rate": 4.028316635682936e-06, "loss": 0.5854945182800293, "step": 13434 }, { "epoch": 16.484662576687118, "grad_norm": 0.28455132246017456, "learning_rate": 4.025584949251027e-06, "loss": 0.6358904838562012, "step": 13435 }, { "epoch": 16.485889570552146, "grad_norm": 0.2543007731437683, "learning_rate": 4.022854108237961e-06, "loss": 0.7647404670715332, "step": 13436 }, { "epoch": 16.487116564417178, "grad_norm": 0.2943209409713745, "learning_rate": 4.0201241127538204e-06, "loss": 0.6368526220321655, "step": 13437 }, { "epoch": 16.48834355828221, "grad_norm": 0.29721346497535706, "learning_rate": 4.0173949629086385e-06, "loss": 0.454767107963562, "step": 13438 }, { "epoch": 16.48957055214724, "grad_norm": 0.2574632465839386, "learning_rate": 4.014666658812419e-06, "loss": 0.5252072811126709, "step": 13439 }, { "epoch": 16.49079754601227, "grad_norm": 0.24143199622631073, "learning_rate": 4.011939200575143e-06, "loss": 0.6532495021820068, "step": 13440 }, { "epoch": 16.4920245398773, "grad_norm": 0.28237384557724, "learning_rate": 4.009212588306732e-06, "loss": 0.7742435336112976, "step": 13441 }, { "epoch": 16.49325153374233, "grad_norm": 0.24412506818771362, "learning_rate": 4.006486822117095e-06, "loss": 0.5811408758163452, "step": 13442 }, { "epoch": 16.494478527607363, "grad_norm": 0.29900017380714417, "learning_rate": 4.003761902116096e-06, "loss": 0.6775732040405273, "step": 13443 }, { "epoch": 16.49570552147239, "grad_norm": 0.24633558094501495, "learning_rate": 4.001037828413573e-06, "loss": 0.5087480545043945, "step": 13444 }, { "epoch": 16.496932515337424, "grad_norm": 0.3105071187019348, "learning_rate": 3.998314601119327e-06, "loss": 0.5450930595397949, "step": 13445 }, { "epoch": 16.498159509202456, "grad_norm": 0.29072239995002747, "learning_rate": 3.995592220343117e-06, "loss": 0.4916825294494629, "step": 13446 }, { "epoch": 16.499386503067484, "grad_norm": 0.27186501026153564, "learning_rate": 3.9928706861946805e-06, "loss": 0.7289361953735352, "step": 13447 }, { "epoch": 16.500613496932516, "grad_norm": 0.2922612428665161, "learning_rate": 3.99014999878371e-06, "loss": 0.6063356399536133, "step": 13448 }, { "epoch": 16.501840490797544, "grad_norm": 0.27941787242889404, "learning_rate": 3.987430158219879e-06, "loss": 0.5558658242225647, "step": 13449 }, { "epoch": 16.503067484662576, "grad_norm": 0.2699414789676666, "learning_rate": 3.984711164612801e-06, "loss": 0.49924546480178833, "step": 13450 }, { "epoch": 16.50429447852761, "grad_norm": 0.2943490743637085, "learning_rate": 3.981993018072081e-06, "loss": 0.7341657876968384, "step": 13451 }, { "epoch": 16.505521472392637, "grad_norm": 0.24542152881622314, "learning_rate": 3.979275718707273e-06, "loss": 0.6278015375137329, "step": 13452 }, { "epoch": 16.50674846625767, "grad_norm": 0.2564992606639862, "learning_rate": 3.97655926662791e-06, "loss": 0.5953559279441833, "step": 13453 }, { "epoch": 16.5079754601227, "grad_norm": 0.23075486719608307, "learning_rate": 3.973843661943491e-06, "loss": 0.4288787543773651, "step": 13454 }, { "epoch": 16.50920245398773, "grad_norm": 0.26729458570480347, "learning_rate": 3.971128904763455e-06, "loss": 0.5012930035591125, "step": 13455 }, { "epoch": 16.51042944785276, "grad_norm": 0.2548949718475342, "learning_rate": 3.9684149951972395e-06, "loss": 0.6532402038574219, "step": 13456 }, { "epoch": 16.51165644171779, "grad_norm": 0.2783442735671997, "learning_rate": 3.965701933354232e-06, "loss": 0.5920007228851318, "step": 13457 }, { "epoch": 16.512883435582822, "grad_norm": 0.2535090148448944, "learning_rate": 3.962989719343785e-06, "loss": 0.46315932273864746, "step": 13458 }, { "epoch": 16.514110429447854, "grad_norm": 0.2677898705005646, "learning_rate": 3.960278353275232e-06, "loss": 0.5558646321296692, "step": 13459 }, { "epoch": 16.515337423312882, "grad_norm": 0.2725067138671875, "learning_rate": 3.957567835257844e-06, "loss": 0.6382761001586914, "step": 13460 }, { "epoch": 16.516564417177914, "grad_norm": 0.24165107309818268, "learning_rate": 3.954858165400882e-06, "loss": 0.5938097238540649, "step": 13461 }, { "epoch": 16.517791411042946, "grad_norm": 0.2987682521343231, "learning_rate": 3.952149343813563e-06, "loss": 0.6900492906570435, "step": 13462 }, { "epoch": 16.519018404907975, "grad_norm": 0.258372962474823, "learning_rate": 3.949441370605075e-06, "loss": 0.7019655704498291, "step": 13463 }, { "epoch": 16.520245398773007, "grad_norm": 0.26436686515808105, "learning_rate": 3.9467342458845754e-06, "loss": 0.6970542073249817, "step": 13464 }, { "epoch": 16.521472392638035, "grad_norm": 0.2789986729621887, "learning_rate": 3.944027969761163e-06, "loss": 0.6571056842803955, "step": 13465 }, { "epoch": 16.522699386503067, "grad_norm": 0.2486659288406372, "learning_rate": 3.941322542343931e-06, "loss": 0.6067806482315063, "step": 13466 }, { "epoch": 16.5239263803681, "grad_norm": 0.2658396065235138, "learning_rate": 3.938617963741928e-06, "loss": 0.6728837490081787, "step": 13467 }, { "epoch": 16.525153374233128, "grad_norm": 0.25753262639045715, "learning_rate": 3.935914234064164e-06, "loss": 0.7721705436706543, "step": 13468 }, { "epoch": 16.52638036809816, "grad_norm": 0.27865612506866455, "learning_rate": 3.933211353419619e-06, "loss": 0.7856959104537964, "step": 13469 }, { "epoch": 16.52760736196319, "grad_norm": 0.281901478767395, "learning_rate": 3.930509321917242e-06, "loss": 0.8136746883392334, "step": 13470 }, { "epoch": 16.52883435582822, "grad_norm": 0.2852582335472107, "learning_rate": 3.927808139665942e-06, "loss": 0.5114399790763855, "step": 13471 }, { "epoch": 16.530061349693252, "grad_norm": 0.2826206684112549, "learning_rate": 3.925107806774597e-06, "loss": 0.6480723023414612, "step": 13472 }, { "epoch": 16.53128834355828, "grad_norm": 0.25171583890914917, "learning_rate": 3.922408323352048e-06, "loss": 0.6222918033599854, "step": 13473 }, { "epoch": 16.532515337423312, "grad_norm": 0.28677019476890564, "learning_rate": 3.919709689507109e-06, "loss": 0.6872520446777344, "step": 13474 }, { "epoch": 16.533742331288344, "grad_norm": 0.2756633758544922, "learning_rate": 3.917011905348544e-06, "loss": 0.7666348218917847, "step": 13475 }, { "epoch": 16.534969325153373, "grad_norm": 0.29107972979545593, "learning_rate": 3.9143149709851e-06, "loss": 0.5737598538398743, "step": 13476 }, { "epoch": 16.536196319018405, "grad_norm": 0.26966923475265503, "learning_rate": 3.911618886525481e-06, "loss": 0.4941493272781372, "step": 13477 }, { "epoch": 16.537423312883437, "grad_norm": 0.23943161964416504, "learning_rate": 3.908923652078359e-06, "loss": 0.5731884241104126, "step": 13478 }, { "epoch": 16.538650306748465, "grad_norm": 0.26497018337249756, "learning_rate": 3.906229267752376e-06, "loss": 0.5329071879386902, "step": 13479 }, { "epoch": 16.539877300613497, "grad_norm": 0.28297799825668335, "learning_rate": 3.903535733656124e-06, "loss": 0.42607471346855164, "step": 13480 }, { "epoch": 16.54110429447853, "grad_norm": 0.24245452880859375, "learning_rate": 3.900843049898178e-06, "loss": 0.35684919357299805, "step": 13481 }, { "epoch": 16.542331288343558, "grad_norm": 0.3887024223804474, "learning_rate": 3.89815121658707e-06, "loss": 0.5130758285522461, "step": 13482 }, { "epoch": 16.54355828220859, "grad_norm": 0.26584768295288086, "learning_rate": 3.895460233831305e-06, "loss": 0.7554689645767212, "step": 13483 }, { "epoch": 16.54478527607362, "grad_norm": 0.2658764719963074, "learning_rate": 3.8927701017393514e-06, "loss": 0.6497974395751953, "step": 13484 }, { "epoch": 16.54601226993865, "grad_norm": 0.2626199722290039, "learning_rate": 3.890080820419628e-06, "loss": 0.4616086483001709, "step": 13485 }, { "epoch": 16.547239263803682, "grad_norm": 0.25026941299438477, "learning_rate": 3.88739238998054e-06, "loss": 0.7010747194290161, "step": 13486 }, { "epoch": 16.54846625766871, "grad_norm": 0.3023575246334076, "learning_rate": 3.8847048105304516e-06, "loss": 0.6873563528060913, "step": 13487 }, { "epoch": 16.549693251533743, "grad_norm": 0.28972235321998596, "learning_rate": 3.882018082177685e-06, "loss": 0.6968100666999817, "step": 13488 }, { "epoch": 16.550920245398775, "grad_norm": 0.24150003492832184, "learning_rate": 3.8793322050305475e-06, "loss": 0.6709027290344238, "step": 13489 }, { "epoch": 16.552147239263803, "grad_norm": 0.2373707890510559, "learning_rate": 3.8766471791972795e-06, "loss": 0.45573821663856506, "step": 13490 }, { "epoch": 16.553374233128835, "grad_norm": 0.32015687227249146, "learning_rate": 3.873963004786124e-06, "loss": 0.5636824369430542, "step": 13491 }, { "epoch": 16.554601226993864, "grad_norm": 0.24834278225898743, "learning_rate": 3.871279681905266e-06, "loss": 0.535146951675415, "step": 13492 }, { "epoch": 16.555828220858896, "grad_norm": 0.3188056945800781, "learning_rate": 3.868597210662869e-06, "loss": 0.7713323831558228, "step": 13493 }, { "epoch": 16.557055214723928, "grad_norm": 0.24394994974136353, "learning_rate": 3.865915591167043e-06, "loss": 0.5260944962501526, "step": 13494 }, { "epoch": 16.558282208588956, "grad_norm": 0.2484259158372879, "learning_rate": 3.863234823525885e-06, "loss": 0.5665057897567749, "step": 13495 }, { "epoch": 16.559509202453988, "grad_norm": 0.26728853583335876, "learning_rate": 3.860554907847447e-06, "loss": 0.6139469146728516, "step": 13496 }, { "epoch": 16.56073619631902, "grad_norm": 0.26596346497535706, "learning_rate": 3.85787584423975e-06, "loss": 0.7352849245071411, "step": 13497 }, { "epoch": 16.56196319018405, "grad_norm": 0.24787908792495728, "learning_rate": 3.855197632810781e-06, "loss": 0.4946523904800415, "step": 13498 }, { "epoch": 16.56319018404908, "grad_norm": 0.23227541148662567, "learning_rate": 3.852520273668486e-06, "loss": 0.6288349032402039, "step": 13499 }, { "epoch": 16.56441717791411, "grad_norm": 0.24459710717201233, "learning_rate": 3.849843766920783e-06, "loss": 0.5223034024238586, "step": 13500 }, { "epoch": 16.56564417177914, "grad_norm": 0.24009688198566437, "learning_rate": 3.847168112675553e-06, "loss": 0.5890909433364868, "step": 13501 }, { "epoch": 16.566871165644173, "grad_norm": 0.24377694725990295, "learning_rate": 3.844493311040651e-06, "loss": 0.6440752148628235, "step": 13502 }, { "epoch": 16.5680981595092, "grad_norm": 0.23835021257400513, "learning_rate": 3.841819362123889e-06, "loss": 0.6620833277702332, "step": 13503 }, { "epoch": 16.569325153374233, "grad_norm": 0.2648313343524933, "learning_rate": 3.839146266033039e-06, "loss": 0.5455707907676697, "step": 13504 }, { "epoch": 16.570552147239265, "grad_norm": 0.2486337125301361, "learning_rate": 3.8364740228758495e-06, "loss": 0.5205816030502319, "step": 13505 }, { "epoch": 16.571779141104294, "grad_norm": 0.21707464754581451, "learning_rate": 3.833802632760033e-06, "loss": 0.5120348930358887, "step": 13506 }, { "epoch": 16.573006134969326, "grad_norm": 0.23007063567638397, "learning_rate": 3.831132095793266e-06, "loss": 0.3249448537826538, "step": 13507 }, { "epoch": 16.574233128834354, "grad_norm": 0.28959813714027405, "learning_rate": 3.828462412083192e-06, "loss": 0.6897670030593872, "step": 13508 }, { "epoch": 16.575460122699386, "grad_norm": 0.2731840908527374, "learning_rate": 3.825793581737411e-06, "loss": 0.4984705448150635, "step": 13509 }, { "epoch": 16.57668711656442, "grad_norm": 0.28456926345825195, "learning_rate": 3.8231256048634976e-06, "loss": 0.7275274991989136, "step": 13510 }, { "epoch": 16.577914110429447, "grad_norm": 0.2992866337299347, "learning_rate": 3.820458481568995e-06, "loss": 0.703822135925293, "step": 13511 }, { "epoch": 16.57914110429448, "grad_norm": 0.28625020384788513, "learning_rate": 3.817792211961402e-06, "loss": 0.6923888325691223, "step": 13512 }, { "epoch": 16.58036809815951, "grad_norm": 0.30114835500717163, "learning_rate": 3.815126796148194e-06, "loss": 0.6845685243606567, "step": 13513 }, { "epoch": 16.58159509202454, "grad_norm": 0.2709263861179352, "learning_rate": 3.812462234236802e-06, "loss": 0.6964688301086426, "step": 13514 }, { "epoch": 16.58282208588957, "grad_norm": 0.29737672209739685, "learning_rate": 3.809798526334632e-06, "loss": 0.4190834164619446, "step": 13515 }, { "epoch": 16.5840490797546, "grad_norm": 0.2327977567911148, "learning_rate": 3.807135672549045e-06, "loss": 0.5275100469589233, "step": 13516 }, { "epoch": 16.58527607361963, "grad_norm": 0.23630207777023315, "learning_rate": 3.80447367298738e-06, "loss": 0.511720597743988, "step": 13517 }, { "epoch": 16.586503067484664, "grad_norm": 0.2726427912712097, "learning_rate": 3.8018125277569234e-06, "loss": 0.48084279894828796, "step": 13518 }, { "epoch": 16.587730061349692, "grad_norm": 0.28367891907691956, "learning_rate": 3.799152236964945e-06, "loss": 0.7814818620681763, "step": 13519 }, { "epoch": 16.588957055214724, "grad_norm": 0.3062478303909302, "learning_rate": 3.7964928007186762e-06, "loss": 0.45501959323883057, "step": 13520 }, { "epoch": 16.590184049079756, "grad_norm": 0.28473085165023804, "learning_rate": 3.7938342191253055e-06, "loss": 0.46007585525512695, "step": 13521 }, { "epoch": 16.591411042944785, "grad_norm": 0.2389981895685196, "learning_rate": 3.7911764922920007e-06, "loss": 0.4917181730270386, "step": 13522 }, { "epoch": 16.592638036809817, "grad_norm": 0.2775445282459259, "learning_rate": 3.7885196203258784e-06, "loss": 0.6160327196121216, "step": 13523 }, { "epoch": 16.593865030674845, "grad_norm": 0.27976056933403015, "learning_rate": 3.785863603334033e-06, "loss": 0.5976130962371826, "step": 13524 }, { "epoch": 16.595092024539877, "grad_norm": 0.29546377062797546, "learning_rate": 3.783208441423519e-06, "loss": 0.6344999074935913, "step": 13525 }, { "epoch": 16.59631901840491, "grad_norm": 0.2509874403476715, "learning_rate": 3.780554134701364e-06, "loss": 0.626896858215332, "step": 13526 }, { "epoch": 16.597546012269937, "grad_norm": 0.28175094723701477, "learning_rate": 3.7779006832745546e-06, "loss": 0.7967015504837036, "step": 13527 }, { "epoch": 16.59877300613497, "grad_norm": 0.27139097452163696, "learning_rate": 3.7752480872500385e-06, "loss": 0.7107314467430115, "step": 13528 }, { "epoch": 16.6, "grad_norm": 0.29114899039268494, "learning_rate": 3.772596346734733e-06, "loss": 0.6419168710708618, "step": 13529 }, { "epoch": 16.60122699386503, "grad_norm": 0.2485905885696411, "learning_rate": 3.769945461835531e-06, "loss": 0.6040633320808411, "step": 13530 }, { "epoch": 16.602453987730062, "grad_norm": 0.2371351569890976, "learning_rate": 3.7672954326592736e-06, "loss": 0.45838648080825806, "step": 13531 }, { "epoch": 16.60368098159509, "grad_norm": 0.25453057885169983, "learning_rate": 3.7646462593127874e-06, "loss": 0.5847880244255066, "step": 13532 }, { "epoch": 16.604907975460122, "grad_norm": 0.2843826711177826, "learning_rate": 3.7619979419028395e-06, "loss": 0.5673969984054565, "step": 13533 }, { "epoch": 16.606134969325154, "grad_norm": 0.21978336572647095, "learning_rate": 3.759350480536175e-06, "loss": 0.4687657356262207, "step": 13534 }, { "epoch": 16.607361963190183, "grad_norm": 0.2504138648509979, "learning_rate": 3.75670387531952e-06, "loss": 0.776801347732544, "step": 13535 }, { "epoch": 16.608588957055215, "grad_norm": 0.2822776138782501, "learning_rate": 3.75405812635955e-06, "loss": 0.6085186004638672, "step": 13536 }, { "epoch": 16.609815950920247, "grad_norm": 0.27751386165618896, "learning_rate": 3.751413233762893e-06, "loss": 0.5504783391952515, "step": 13537 }, { "epoch": 16.611042944785275, "grad_norm": 0.2616467773914337, "learning_rate": 3.748769197636168e-06, "loss": 0.5619747042655945, "step": 13538 }, { "epoch": 16.612269938650307, "grad_norm": 0.23758041858673096, "learning_rate": 3.7461260180859465e-06, "loss": 0.4576037526130676, "step": 13539 }, { "epoch": 16.61349693251534, "grad_norm": 0.2717045843601227, "learning_rate": 3.7434836952187662e-06, "loss": 0.6839281916618347, "step": 13540 }, { "epoch": 16.614723926380368, "grad_norm": 0.2779899537563324, "learning_rate": 3.7408422291411365e-06, "loss": 0.6558777093887329, "step": 13541 }, { "epoch": 16.6159509202454, "grad_norm": 0.25424543023109436, "learning_rate": 3.738201619959522e-06, "loss": 0.34556156396865845, "step": 13542 }, { "epoch": 16.617177914110428, "grad_norm": 0.2425185889005661, "learning_rate": 3.735561867780357e-06, "loss": 0.40559080243110657, "step": 13543 }, { "epoch": 16.61840490797546, "grad_norm": 0.29092735052108765, "learning_rate": 3.7329229727100455e-06, "loss": 0.7236274480819702, "step": 13544 }, { "epoch": 16.619631901840492, "grad_norm": 0.27546414732933044, "learning_rate": 3.730284934854952e-06, "loss": 0.43334996700286865, "step": 13545 }, { "epoch": 16.62085889570552, "grad_norm": 0.2896122634410858, "learning_rate": 3.727647754321417e-06, "loss": 0.6624947786331177, "step": 13546 }, { "epoch": 16.622085889570553, "grad_norm": 0.2513180375099182, "learning_rate": 3.725011431215725e-06, "loss": 0.647390604019165, "step": 13547 }, { "epoch": 16.62331288343558, "grad_norm": 0.25170135498046875, "learning_rate": 3.7223759656441424e-06, "loss": 0.5863826274871826, "step": 13548 }, { "epoch": 16.624539877300613, "grad_norm": 0.24896259605884552, "learning_rate": 3.7197413577129025e-06, "loss": 0.4133906066417694, "step": 13549 }, { "epoch": 16.625766871165645, "grad_norm": 0.2675032913684845, "learning_rate": 3.717107607528192e-06, "loss": 0.7516257762908936, "step": 13550 }, { "epoch": 16.626993865030673, "grad_norm": 0.26977258920669556, "learning_rate": 3.7144747151961784e-06, "loss": 0.5856773853302002, "step": 13551 }, { "epoch": 16.628220858895705, "grad_norm": 0.279758483171463, "learning_rate": 3.7118426808229772e-06, "loss": 0.5759555101394653, "step": 13552 }, { "epoch": 16.629447852760737, "grad_norm": 0.32101279497146606, "learning_rate": 3.709211504514681e-06, "loss": 0.7530255317687988, "step": 13553 }, { "epoch": 16.630674846625766, "grad_norm": 0.22524166107177734, "learning_rate": 3.7065811863773447e-06, "loss": 0.5123332738876343, "step": 13554 }, { "epoch": 16.631901840490798, "grad_norm": 0.2562722861766815, "learning_rate": 3.703951726516991e-06, "loss": 0.35065847635269165, "step": 13555 }, { "epoch": 16.63312883435583, "grad_norm": 0.2583390176296234, "learning_rate": 3.7013231250396072e-06, "loss": 0.7263277769088745, "step": 13556 }, { "epoch": 16.63435582822086, "grad_norm": 0.2646239697933197, "learning_rate": 3.69869538205114e-06, "loss": 0.665052056312561, "step": 13557 }, { "epoch": 16.63558282208589, "grad_norm": 0.29707950353622437, "learning_rate": 3.6960684976575094e-06, "loss": 0.6265705823898315, "step": 13558 }, { "epoch": 16.63680981595092, "grad_norm": 0.3031419515609741, "learning_rate": 3.693442471964595e-06, "loss": 0.5218082070350647, "step": 13559 }, { "epoch": 16.63803680981595, "grad_norm": 0.2634071707725525, "learning_rate": 3.6908173050782534e-06, "loss": 0.4609111547470093, "step": 13560 }, { "epoch": 16.639263803680983, "grad_norm": 0.25400203466415405, "learning_rate": 3.6881929971042867e-06, "loss": 0.4270533323287964, "step": 13561 }, { "epoch": 16.64049079754601, "grad_norm": 0.2345379739999771, "learning_rate": 3.6855695481484764e-06, "loss": 0.5307196378707886, "step": 13562 }, { "epoch": 16.641717791411043, "grad_norm": 0.25783422589302063, "learning_rate": 3.682946958316566e-06, "loss": 0.49784544110298157, "step": 13563 }, { "epoch": 16.642944785276075, "grad_norm": 0.2748885750770569, "learning_rate": 3.680325227714265e-06, "loss": 0.5985432863235474, "step": 13564 }, { "epoch": 16.644171779141104, "grad_norm": 0.2539173662662506, "learning_rate": 3.6777043564472534e-06, "loss": 0.4422360062599182, "step": 13565 }, { "epoch": 16.645398773006136, "grad_norm": 0.2689434885978699, "learning_rate": 3.675084344621163e-06, "loss": 0.7707754373550415, "step": 13566 }, { "epoch": 16.646625766871164, "grad_norm": 0.23252874612808228, "learning_rate": 3.672465192341601e-06, "loss": 0.5364407896995544, "step": 13567 }, { "epoch": 16.647852760736196, "grad_norm": 0.2507830858230591, "learning_rate": 3.669846899714138e-06, "loss": 0.5653311610221863, "step": 13568 }, { "epoch": 16.649079754601228, "grad_norm": 0.26781129837036133, "learning_rate": 3.667229466844313e-06, "loss": 0.6274277567863464, "step": 13569 }, { "epoch": 16.650306748466257, "grad_norm": 0.23064909875392914, "learning_rate": 3.6646128938376297e-06, "loss": 0.49413347244262695, "step": 13570 }, { "epoch": 16.65153374233129, "grad_norm": 0.2826974093914032, "learning_rate": 3.661997180799545e-06, "loss": 0.7142066359519958, "step": 13571 }, { "epoch": 16.65276073619632, "grad_norm": 0.3049847185611725, "learning_rate": 3.659382327835495e-06, "loss": 0.5569762587547302, "step": 13572 }, { "epoch": 16.65398773006135, "grad_norm": 0.2784525454044342, "learning_rate": 3.6567683350508807e-06, "loss": 0.5983116626739502, "step": 13573 }, { "epoch": 16.65521472392638, "grad_norm": 0.2894388735294342, "learning_rate": 3.65415520255106e-06, "loss": 0.4138932228088379, "step": 13574 }, { "epoch": 16.65644171779141, "grad_norm": 0.241652712225914, "learning_rate": 3.6515429304413624e-06, "loss": 0.632384717464447, "step": 13575 }, { "epoch": 16.65766871165644, "grad_norm": 0.27355942130088806, "learning_rate": 3.6489315188270867e-06, "loss": 0.5444806814193726, "step": 13576 }, { "epoch": 16.658895705521473, "grad_norm": 0.28422871232032776, "learning_rate": 3.6463209678134825e-06, "loss": 0.7084797620773315, "step": 13577 }, { "epoch": 16.660122699386502, "grad_norm": 0.25753363966941833, "learning_rate": 3.643711277505776e-06, "loss": 0.68907231092453, "step": 13578 }, { "epoch": 16.661349693251534, "grad_norm": 0.265800803899765, "learning_rate": 3.6411024480091536e-06, "loss": 0.4828627407550812, "step": 13579 }, { "epoch": 16.662576687116566, "grad_norm": 0.25950974225997925, "learning_rate": 3.6384944794287862e-06, "loss": 0.45177459716796875, "step": 13580 }, { "epoch": 16.663803680981594, "grad_norm": 0.2727930545806885, "learning_rate": 3.6358873718697726e-06, "loss": 0.3861740529537201, "step": 13581 }, { "epoch": 16.665030674846626, "grad_norm": 0.27205589413642883, "learning_rate": 3.6332811254372075e-06, "loss": 0.5940740704536438, "step": 13582 }, { "epoch": 16.666257668711655, "grad_norm": 0.3040759861469269, "learning_rate": 3.6306757402361417e-06, "loss": 0.5889096260070801, "step": 13583 }, { "epoch": 16.667484662576687, "grad_norm": 0.2589207589626312, "learning_rate": 3.6280712163715898e-06, "loss": 0.8433483839035034, "step": 13584 }, { "epoch": 16.66871165644172, "grad_norm": 0.26233547925949097, "learning_rate": 3.625467553948539e-06, "loss": 0.6691755056381226, "step": 13585 }, { "epoch": 16.669938650306747, "grad_norm": 0.2714892327785492, "learning_rate": 3.6228647530719195e-06, "loss": 0.6778042912483215, "step": 13586 }, { "epoch": 16.67116564417178, "grad_norm": 0.291202187538147, "learning_rate": 3.620262813846656e-06, "loss": 0.6806539297103882, "step": 13587 }, { "epoch": 16.67239263803681, "grad_norm": 0.2875162363052368, "learning_rate": 3.61766173637762e-06, "loss": 0.6928143501281738, "step": 13588 }, { "epoch": 16.67361963190184, "grad_norm": 0.22702521085739136, "learning_rate": 3.6150615207696553e-06, "loss": 0.5771082639694214, "step": 13589 }, { "epoch": 16.67484662576687, "grad_norm": 0.25797757506370544, "learning_rate": 3.6124621671275752e-06, "loss": 0.6669694185256958, "step": 13590 }, { "epoch": 16.6760736196319, "grad_norm": 0.2818126976490021, "learning_rate": 3.609863675556138e-06, "loss": 0.5346481800079346, "step": 13591 }, { "epoch": 16.677300613496932, "grad_norm": 0.2541286051273346, "learning_rate": 3.607266046160093e-06, "loss": 0.5398818254470825, "step": 13592 }, { "epoch": 16.678527607361964, "grad_norm": 0.27284878492355347, "learning_rate": 3.6046692790441372e-06, "loss": 0.6868064999580383, "step": 13593 }, { "epoch": 16.679754601226993, "grad_norm": 0.295403391122818, "learning_rate": 3.602073374312942e-06, "loss": 0.6123135685920715, "step": 13594 }, { "epoch": 16.680981595092025, "grad_norm": 0.2644230127334595, "learning_rate": 3.5994783320711463e-06, "loss": 0.6308223009109497, "step": 13595 }, { "epoch": 16.682208588957057, "grad_norm": 0.25236859917640686, "learning_rate": 3.596884152423338e-06, "loss": 0.7930248975753784, "step": 13596 }, { "epoch": 16.683435582822085, "grad_norm": 0.3059849441051483, "learning_rate": 3.5942908354740847e-06, "loss": 0.715412437915802, "step": 13597 }, { "epoch": 16.684662576687117, "grad_norm": 0.2781204879283905, "learning_rate": 3.5916983813279155e-06, "loss": 0.8444545865058899, "step": 13598 }, { "epoch": 16.68588957055215, "grad_norm": 0.2843727171421051, "learning_rate": 3.5891067900893276e-06, "loss": 0.6137547492980957, "step": 13599 }, { "epoch": 16.687116564417177, "grad_norm": 0.2808291018009186, "learning_rate": 3.5865160618627852e-06, "loss": 0.5893188714981079, "step": 13600 }, { "epoch": 16.68834355828221, "grad_norm": 0.29512178897857666, "learning_rate": 3.583926196752696e-06, "loss": 0.5986939668655396, "step": 13601 }, { "epoch": 16.689570552147238, "grad_norm": 0.2655300498008728, "learning_rate": 3.5813371948634653e-06, "loss": 0.6110125780105591, "step": 13602 }, { "epoch": 16.69079754601227, "grad_norm": 0.3062121272087097, "learning_rate": 3.5787490562994463e-06, "loss": 0.5606561899185181, "step": 13603 }, { "epoch": 16.692024539877302, "grad_norm": 0.29366791248321533, "learning_rate": 3.5761617811649634e-06, "loss": 0.6082063913345337, "step": 13604 }, { "epoch": 16.69325153374233, "grad_norm": 0.3370436131954193, "learning_rate": 3.5735753695642886e-06, "loss": 0.6973604559898376, "step": 13605 }, { "epoch": 16.694478527607362, "grad_norm": 0.25652211904525757, "learning_rate": 3.5709898216016835e-06, "loss": 0.7742854356765747, "step": 13606 }, { "epoch": 16.69570552147239, "grad_norm": 0.26020464301109314, "learning_rate": 3.5684051373813587e-06, "loss": 0.4977881908416748, "step": 13607 }, { "epoch": 16.696932515337423, "grad_norm": 0.28009840846061707, "learning_rate": 3.565821317007498e-06, "loss": 0.7324886322021484, "step": 13608 }, { "epoch": 16.698159509202455, "grad_norm": 0.2796640396118164, "learning_rate": 3.563238360584256e-06, "loss": 0.7661074995994568, "step": 13609 }, { "epoch": 16.699386503067483, "grad_norm": 0.25862374901771545, "learning_rate": 3.5606562682157285e-06, "loss": 0.6600699424743652, "step": 13610 }, { "epoch": 16.700613496932515, "grad_norm": 0.2614816129207611, "learning_rate": 3.5580750400059982e-06, "loss": 0.5088216066360474, "step": 13611 }, { "epoch": 16.701840490797547, "grad_norm": 0.28164899349212646, "learning_rate": 3.555494676059112e-06, "loss": 0.3908440172672272, "step": 13612 }, { "epoch": 16.703067484662576, "grad_norm": 0.2340773642063141, "learning_rate": 3.552915176479071e-06, "loss": 0.49042993783950806, "step": 13613 }, { "epoch": 16.704294478527608, "grad_norm": 0.282294899225235, "learning_rate": 3.5503365413698586e-06, "loss": 0.8712993860244751, "step": 13614 }, { "epoch": 16.70552147239264, "grad_norm": 0.2397647351026535, "learning_rate": 3.5477587708353967e-06, "loss": 0.60031658411026, "step": 13615 }, { "epoch": 16.706748466257668, "grad_norm": 0.2964085638523102, "learning_rate": 3.5451818649795964e-06, "loss": 0.6269068717956543, "step": 13616 }, { "epoch": 16.7079754601227, "grad_norm": 0.2923966646194458, "learning_rate": 3.5426058239063225e-06, "loss": 0.5541461110115051, "step": 13617 }, { "epoch": 16.70920245398773, "grad_norm": 0.2910810708999634, "learning_rate": 3.5400306477194083e-06, "loss": 0.7940367460250854, "step": 13618 }, { "epoch": 16.71042944785276, "grad_norm": 0.2815234065055847, "learning_rate": 3.53745633652266e-06, "loss": 0.5329755544662476, "step": 13619 }, { "epoch": 16.711656441717793, "grad_norm": 0.26581230759620667, "learning_rate": 3.53488289041983e-06, "loss": 0.6047123074531555, "step": 13620 }, { "epoch": 16.71288343558282, "grad_norm": 0.23818054795265198, "learning_rate": 3.532310309514647e-06, "loss": 0.63443523645401, "step": 13621 }, { "epoch": 16.714110429447853, "grad_norm": 0.28829506039619446, "learning_rate": 3.5297385939108086e-06, "loss": 0.7043338418006897, "step": 13622 }, { "epoch": 16.715337423312885, "grad_norm": 0.2895212471485138, "learning_rate": 3.5271677437119734e-06, "loss": 0.66511470079422, "step": 13623 }, { "epoch": 16.716564417177914, "grad_norm": 0.2840301990509033, "learning_rate": 3.524597759021764e-06, "loss": 0.4641435742378235, "step": 13624 }, { "epoch": 16.717791411042946, "grad_norm": 0.27899056673049927, "learning_rate": 3.5220286399437698e-06, "loss": 0.7292705774307251, "step": 13625 }, { "epoch": 16.719018404907974, "grad_norm": 0.30384737253189087, "learning_rate": 3.519460386581541e-06, "loss": 0.6060152649879456, "step": 13626 }, { "epoch": 16.720245398773006, "grad_norm": 0.2620384991168976, "learning_rate": 3.5168929990386013e-06, "loss": 0.5413442850112915, "step": 13627 }, { "epoch": 16.721472392638038, "grad_norm": 0.2674797773361206, "learning_rate": 3.514326477418439e-06, "loss": 0.6548683643341064, "step": 13628 }, { "epoch": 16.722699386503066, "grad_norm": 0.29867759346961975, "learning_rate": 3.5117608218244918e-06, "loss": 0.6089544892311096, "step": 13629 }, { "epoch": 16.7239263803681, "grad_norm": 0.2547454535961151, "learning_rate": 3.509196032360179e-06, "loss": 0.49666255712509155, "step": 13630 }, { "epoch": 16.72515337423313, "grad_norm": 0.30053648352622986, "learning_rate": 3.5066321091288823e-06, "loss": 0.5998561382293701, "step": 13631 }, { "epoch": 16.72638036809816, "grad_norm": 0.2684110403060913, "learning_rate": 3.5040690522339437e-06, "loss": 0.646910548210144, "step": 13632 }, { "epoch": 16.72760736196319, "grad_norm": 0.2843230962753296, "learning_rate": 3.501506861778678e-06, "loss": 0.7195920944213867, "step": 13633 }, { "epoch": 16.72883435582822, "grad_norm": 0.26792532205581665, "learning_rate": 3.4989455378663495e-06, "loss": 0.7078741192817688, "step": 13634 }, { "epoch": 16.73006134969325, "grad_norm": 0.2772303521633148, "learning_rate": 3.496385080600201e-06, "loss": 0.38101398944854736, "step": 13635 }, { "epoch": 16.731288343558283, "grad_norm": 0.2922649085521698, "learning_rate": 3.493825490083444e-06, "loss": 0.5084987878799438, "step": 13636 }, { "epoch": 16.73251533742331, "grad_norm": 0.3166411817073822, "learning_rate": 3.4912667664192405e-06, "loss": 0.47784435749053955, "step": 13637 }, { "epoch": 16.733742331288344, "grad_norm": 0.22094576060771942, "learning_rate": 3.488708909710736e-06, "loss": 0.49282175302505493, "step": 13638 }, { "epoch": 16.734969325153376, "grad_norm": 0.2632770538330078, "learning_rate": 3.4861519200610175e-06, "loss": 0.6506996154785156, "step": 13639 }, { "epoch": 16.736196319018404, "grad_norm": 0.3176959156990051, "learning_rate": 3.4835957975731553e-06, "loss": 0.4167996048927307, "step": 13640 }, { "epoch": 16.737423312883436, "grad_norm": 0.2839564383029938, "learning_rate": 3.481040542350181e-06, "loss": 0.774570107460022, "step": 13641 }, { "epoch": 16.738650306748465, "grad_norm": 0.29241037368774414, "learning_rate": 3.4784861544950863e-06, "loss": 0.5417691469192505, "step": 13642 }, { "epoch": 16.739877300613497, "grad_norm": 0.27787351608276367, "learning_rate": 3.4759326341108428e-06, "loss": 0.7086392641067505, "step": 13643 }, { "epoch": 16.74110429447853, "grad_norm": 0.24497509002685547, "learning_rate": 3.4733799813003564e-06, "loss": 0.5511797070503235, "step": 13644 }, { "epoch": 16.742331288343557, "grad_norm": 0.23653855919837952, "learning_rate": 3.470828196166523e-06, "loss": 0.5285766124725342, "step": 13645 }, { "epoch": 16.74355828220859, "grad_norm": 0.2720735967159271, "learning_rate": 3.4682772788122096e-06, "loss": 0.5938529372215271, "step": 13646 }, { "epoch": 16.74478527607362, "grad_norm": 0.23708631098270416, "learning_rate": 3.465727229340232e-06, "loss": 0.37712568044662476, "step": 13647 }, { "epoch": 16.74601226993865, "grad_norm": 0.2647550106048584, "learning_rate": 3.463178047853366e-06, "loss": 0.5898216962814331, "step": 13648 }, { "epoch": 16.74723926380368, "grad_norm": 0.26845356822013855, "learning_rate": 3.460629734454371e-06, "loss": 0.6812973022460938, "step": 13649 }, { "epoch": 16.74846625766871, "grad_norm": 0.2609066367149353, "learning_rate": 3.4580822892459563e-06, "loss": 0.5620642900466919, "step": 13650 }, { "epoch": 16.749693251533742, "grad_norm": 0.26618310809135437, "learning_rate": 3.4555357123308068e-06, "loss": 0.5522047877311707, "step": 13651 }, { "epoch": 16.750920245398774, "grad_norm": 0.25617074966430664, "learning_rate": 3.4529900038115707e-06, "loss": 0.5922518968582153, "step": 13652 }, { "epoch": 16.752147239263802, "grad_norm": 0.2535301148891449, "learning_rate": 3.4504451637908468e-06, "loss": 0.30777958035469055, "step": 13653 }, { "epoch": 16.753374233128834, "grad_norm": 0.285336971282959, "learning_rate": 3.4479011923712195e-06, "loss": 0.6380115747451782, "step": 13654 }, { "epoch": 16.754601226993866, "grad_norm": 0.244461789727211, "learning_rate": 3.4453580896552236e-06, "loss": 0.5648123025894165, "step": 13655 }, { "epoch": 16.755828220858895, "grad_norm": 0.24434275925159454, "learning_rate": 3.442815855745371e-06, "loss": 0.4557059407234192, "step": 13656 }, { "epoch": 16.757055214723927, "grad_norm": 0.23532100021839142, "learning_rate": 3.4402744907441302e-06, "loss": 0.5127414464950562, "step": 13657 }, { "epoch": 16.758282208588955, "grad_norm": 0.24098972976207733, "learning_rate": 3.4377339947539305e-06, "loss": 0.5896214246749878, "step": 13658 }, { "epoch": 16.759509202453987, "grad_norm": 0.2570820450782776, "learning_rate": 3.4351943678771786e-06, "loss": 0.7916345596313477, "step": 13659 }, { "epoch": 16.76073619631902, "grad_norm": 0.26222509145736694, "learning_rate": 3.4326556102162343e-06, "loss": 0.5742987394332886, "step": 13660 }, { "epoch": 16.761963190184048, "grad_norm": 0.27286380529403687, "learning_rate": 3.4301177218734297e-06, "loss": 0.7088263034820557, "step": 13661 }, { "epoch": 16.76319018404908, "grad_norm": 0.2731969654560089, "learning_rate": 3.4275807029510665e-06, "loss": 0.5780202746391296, "step": 13662 }, { "epoch": 16.764417177914112, "grad_norm": 0.29798537492752075, "learning_rate": 3.425044553551393e-06, "loss": 0.5835664868354797, "step": 13663 }, { "epoch": 16.76564417177914, "grad_norm": 0.2695494592189789, "learning_rate": 3.422509273776642e-06, "loss": 0.6438637375831604, "step": 13664 }, { "epoch": 16.766871165644172, "grad_norm": 0.2844570577144623, "learning_rate": 3.419974863729e-06, "loss": 0.44763287901878357, "step": 13665 }, { "epoch": 16.7680981595092, "grad_norm": 0.2954992949962616, "learning_rate": 3.417441323510623e-06, "loss": 0.5522452592849731, "step": 13666 }, { "epoch": 16.769325153374233, "grad_norm": 0.27074962854385376, "learning_rate": 3.4149086532236337e-06, "loss": 0.7427090406417847, "step": 13667 }, { "epoch": 16.770552147239265, "grad_norm": 0.28654778003692627, "learning_rate": 3.412376852970106e-06, "loss": 0.7670692205429077, "step": 13668 }, { "epoch": 16.771779141104293, "grad_norm": 0.3210298717021942, "learning_rate": 3.4098459228521036e-06, "loss": 0.8004260063171387, "step": 13669 }, { "epoch": 16.773006134969325, "grad_norm": 0.28869330883026123, "learning_rate": 3.407315862971633e-06, "loss": 0.5706861019134521, "step": 13670 }, { "epoch": 16.774233128834357, "grad_norm": 0.2717251181602478, "learning_rate": 3.404786673430682e-06, "loss": 0.6668117046356201, "step": 13671 }, { "epoch": 16.775460122699386, "grad_norm": 0.2768784463405609, "learning_rate": 3.4022583543311835e-06, "loss": 0.8200500011444092, "step": 13672 }, { "epoch": 16.776687116564418, "grad_norm": 0.24754665791988373, "learning_rate": 3.3997309057750504e-06, "loss": 0.5645126104354858, "step": 13673 }, { "epoch": 16.77791411042945, "grad_norm": 0.26835593581199646, "learning_rate": 3.397204327864162e-06, "loss": 0.6577153205871582, "step": 13674 }, { "epoch": 16.779141104294478, "grad_norm": 0.29004305601119995, "learning_rate": 3.3946786207003504e-06, "loss": 0.4410756230354309, "step": 13675 }, { "epoch": 16.78036809815951, "grad_norm": 0.2525753378868103, "learning_rate": 3.392153784385432e-06, "loss": 0.6147102117538452, "step": 13676 }, { "epoch": 16.78159509202454, "grad_norm": 0.2665146291255951, "learning_rate": 3.389629819021159e-06, "loss": 0.5674249529838562, "step": 13677 }, { "epoch": 16.78282208588957, "grad_norm": 0.29084455966949463, "learning_rate": 3.387106724709274e-06, "loss": 0.6906088590621948, "step": 13678 }, { "epoch": 16.784049079754602, "grad_norm": 0.25060901045799255, "learning_rate": 3.384584501551477e-06, "loss": 0.6115405559539795, "step": 13679 }, { "epoch": 16.78527607361963, "grad_norm": 0.29624122381210327, "learning_rate": 3.3820631496494255e-06, "loss": 0.46211329102516174, "step": 13680 }, { "epoch": 16.786503067484663, "grad_norm": 0.2781578600406647, "learning_rate": 3.3795426691047573e-06, "loss": 0.5768945217132568, "step": 13681 }, { "epoch": 16.787730061349695, "grad_norm": 0.2531939148902893, "learning_rate": 3.377023060019063e-06, "loss": 0.7268447875976562, "step": 13682 }, { "epoch": 16.788957055214723, "grad_norm": 0.26953595876693726, "learning_rate": 3.3745043224938933e-06, "loss": 0.5996267795562744, "step": 13683 }, { "epoch": 16.790184049079755, "grad_norm": 0.2821621000766754, "learning_rate": 3.371986456630777e-06, "loss": 0.5313427448272705, "step": 13684 }, { "epoch": 16.791411042944784, "grad_norm": 0.27373284101486206, "learning_rate": 3.3694694625311994e-06, "loss": 0.42417284846305847, "step": 13685 }, { "epoch": 16.792638036809816, "grad_norm": 0.28170543909072876, "learning_rate": 3.366953340296619e-06, "loss": 0.579779863357544, "step": 13686 }, { "epoch": 16.793865030674848, "grad_norm": 0.24232280254364014, "learning_rate": 3.3644380900284517e-06, "loss": 0.5528091192245483, "step": 13687 }, { "epoch": 16.795092024539876, "grad_norm": 0.3064959943294525, "learning_rate": 3.3619237118280773e-06, "loss": 0.6563563346862793, "step": 13688 }, { "epoch": 16.79631901840491, "grad_norm": 0.2818920314311981, "learning_rate": 3.3594102057968425e-06, "loss": 0.6639820337295532, "step": 13689 }, { "epoch": 16.79754601226994, "grad_norm": 0.393480122089386, "learning_rate": 3.3568975720360553e-06, "loss": 0.556190013885498, "step": 13690 }, { "epoch": 16.79877300613497, "grad_norm": 0.2707825303077698, "learning_rate": 3.3543858106470095e-06, "loss": 0.4234004616737366, "step": 13691 }, { "epoch": 16.8, "grad_norm": 0.27184173464775085, "learning_rate": 3.3518749217309327e-06, "loss": 0.4792238473892212, "step": 13692 }, { "epoch": 16.80122699386503, "grad_norm": 0.2704128623008728, "learning_rate": 3.3493649053890326e-06, "loss": 0.6733849048614502, "step": 13693 }, { "epoch": 16.80245398773006, "grad_norm": 0.27582141757011414, "learning_rate": 3.3468557617224865e-06, "loss": 0.4464452862739563, "step": 13694 }, { "epoch": 16.803680981595093, "grad_norm": 0.26427775621414185, "learning_rate": 3.344347490832428e-06, "loss": 0.6094650030136108, "step": 13695 }, { "epoch": 16.80490797546012, "grad_norm": 0.24132053554058075, "learning_rate": 3.341840092819962e-06, "loss": 0.6693807244300842, "step": 13696 }, { "epoch": 16.806134969325154, "grad_norm": 0.26663413643836975, "learning_rate": 3.339333567786146e-06, "loss": 0.6125706434249878, "step": 13697 }, { "epoch": 16.807361963190186, "grad_norm": 0.2451070249080658, "learning_rate": 3.3368279158320164e-06, "loss": 0.595022439956665, "step": 13698 }, { "epoch": 16.808588957055214, "grad_norm": 0.24644017219543457, "learning_rate": 3.3343231370585676e-06, "loss": 0.5038022398948669, "step": 13699 }, { "epoch": 16.809815950920246, "grad_norm": 0.28383350372314453, "learning_rate": 3.3318192315667603e-06, "loss": 0.6203083992004395, "step": 13700 }, { "epoch": 16.811042944785274, "grad_norm": 0.28188708424568176, "learning_rate": 3.3293161994575245e-06, "loss": 0.44332969188690186, "step": 13701 }, { "epoch": 16.812269938650306, "grad_norm": 0.30296874046325684, "learning_rate": 3.3268140408317438e-06, "loss": 0.6986279487609863, "step": 13702 }, { "epoch": 16.81349693251534, "grad_norm": 0.2620087265968323, "learning_rate": 3.3243127557902736e-06, "loss": 0.43799012899398804, "step": 13703 }, { "epoch": 16.814723926380367, "grad_norm": 0.2459009736776352, "learning_rate": 3.3218123444339333e-06, "loss": 0.49807852506637573, "step": 13704 }, { "epoch": 16.8159509202454, "grad_norm": 0.2876974940299988, "learning_rate": 3.3193128068635113e-06, "loss": 0.6790008544921875, "step": 13705 }, { "epoch": 16.81717791411043, "grad_norm": 0.3014315366744995, "learning_rate": 3.316814143179761e-06, "loss": 0.6624129414558411, "step": 13706 }, { "epoch": 16.81840490797546, "grad_norm": 0.26012659072875977, "learning_rate": 3.314316353483385e-06, "loss": 0.5877549648284912, "step": 13707 }, { "epoch": 16.81963190184049, "grad_norm": 0.29373836517333984, "learning_rate": 3.3118194378750633e-06, "loss": 0.4922751188278198, "step": 13708 }, { "epoch": 16.82085889570552, "grad_norm": 0.2501281201839447, "learning_rate": 3.3093233964554466e-06, "loss": 0.6812133193016052, "step": 13709 }, { "epoch": 16.822085889570552, "grad_norm": 0.2748108506202698, "learning_rate": 3.30682822932514e-06, "loss": 0.5887813568115234, "step": 13710 }, { "epoch": 16.823312883435584, "grad_norm": 0.2874712347984314, "learning_rate": 3.304333936584722e-06, "loss": 0.6559007167816162, "step": 13711 }, { "epoch": 16.824539877300612, "grad_norm": 0.268670916557312, "learning_rate": 3.3018405183347145e-06, "loss": 0.5140285491943359, "step": 13712 }, { "epoch": 16.825766871165644, "grad_norm": 0.2862181067466736, "learning_rate": 3.2993479746756346e-06, "loss": 0.6919898986816406, "step": 13713 }, { "epoch": 16.826993865030676, "grad_norm": 0.302206426858902, "learning_rate": 3.296856305707946e-06, "loss": 0.6271101236343384, "step": 13714 }, { "epoch": 16.828220858895705, "grad_norm": 0.2705046534538269, "learning_rate": 3.294365511532088e-06, "loss": 0.648301362991333, "step": 13715 }, { "epoch": 16.829447852760737, "grad_norm": 0.28946536779403687, "learning_rate": 3.2918755922484417e-06, "loss": 0.6622616648674011, "step": 13716 }, { "epoch": 16.830674846625765, "grad_norm": 0.2647936940193176, "learning_rate": 3.289386547957379e-06, "loss": 0.8046964406967163, "step": 13717 }, { "epoch": 16.831901840490797, "grad_norm": 0.23822569847106934, "learning_rate": 3.2868983787592232e-06, "loss": 0.427554190158844, "step": 13718 }, { "epoch": 16.83312883435583, "grad_norm": 0.3229854702949524, "learning_rate": 3.2844110847542657e-06, "loss": 0.4921659231185913, "step": 13719 }, { "epoch": 16.834355828220858, "grad_norm": 0.27087000012397766, "learning_rate": 3.2819246660427656e-06, "loss": 0.4036421775817871, "step": 13720 }, { "epoch": 16.83558282208589, "grad_norm": 0.2532016336917877, "learning_rate": 3.2794391227249392e-06, "loss": 0.6492469310760498, "step": 13721 }, { "epoch": 16.83680981595092, "grad_norm": 0.2927442193031311, "learning_rate": 3.276954454900971e-06, "loss": 0.5853734612464905, "step": 13722 }, { "epoch": 16.83803680981595, "grad_norm": 0.26540419459342957, "learning_rate": 3.2744706626710113e-06, "loss": 0.6905418634414673, "step": 13723 }, { "epoch": 16.839263803680982, "grad_norm": 0.3028125464916229, "learning_rate": 3.271987746135177e-06, "loss": 0.6062204837799072, "step": 13724 }, { "epoch": 16.84049079754601, "grad_norm": 0.24234539270401, "learning_rate": 3.2695057053935523e-06, "loss": 0.5378347039222717, "step": 13725 }, { "epoch": 16.841717791411043, "grad_norm": 0.25143617391586304, "learning_rate": 3.267024540546171e-06, "loss": 0.49239054322242737, "step": 13726 }, { "epoch": 16.842944785276075, "grad_norm": 0.29663941264152527, "learning_rate": 3.264544251693044e-06, "loss": 0.598753035068512, "step": 13727 }, { "epoch": 16.844171779141103, "grad_norm": 0.2550869882106781, "learning_rate": 3.262064838934148e-06, "loss": 0.4962111711502075, "step": 13728 }, { "epoch": 16.845398773006135, "grad_norm": 0.23554383218288422, "learning_rate": 3.2595863023694188e-06, "loss": 0.5428010821342468, "step": 13729 }, { "epoch": 16.846625766871167, "grad_norm": 0.29898083209991455, "learning_rate": 3.257108642098766e-06, "loss": 0.7046200037002563, "step": 13730 }, { "epoch": 16.847852760736195, "grad_norm": 0.27915793657302856, "learning_rate": 3.2546318582220458e-06, "loss": 0.57014399766922, "step": 13731 }, { "epoch": 16.849079754601227, "grad_norm": 0.25677624344825745, "learning_rate": 3.2521559508390946e-06, "loss": 0.7771143317222595, "step": 13732 }, { "epoch": 16.85030674846626, "grad_norm": 0.2377089560031891, "learning_rate": 3.249680920049711e-06, "loss": 0.42101335525512695, "step": 13733 }, { "epoch": 16.851533742331288, "grad_norm": 0.283773809671402, "learning_rate": 3.247206765953656e-06, "loss": 0.5106891393661499, "step": 13734 }, { "epoch": 16.85276073619632, "grad_norm": 0.3175644874572754, "learning_rate": 3.244733488650656e-06, "loss": 0.6594058871269226, "step": 13735 }, { "epoch": 16.85398773006135, "grad_norm": 0.22996892035007477, "learning_rate": 3.2422610882404004e-06, "loss": 0.6034964919090271, "step": 13736 }, { "epoch": 16.85521472392638, "grad_norm": 0.2831455171108246, "learning_rate": 3.239789564822546e-06, "loss": 0.6707595586776733, "step": 13737 }, { "epoch": 16.856441717791412, "grad_norm": 0.2565079629421234, "learning_rate": 3.2373189184967096e-06, "loss": 0.5312426090240479, "step": 13738 }, { "epoch": 16.85766871165644, "grad_norm": 0.25172021985054016, "learning_rate": 3.234849149362487e-06, "loss": 0.48015719652175903, "step": 13739 }, { "epoch": 16.858895705521473, "grad_norm": 0.25895237922668457, "learning_rate": 3.232380257519413e-06, "loss": 0.608283519744873, "step": 13740 }, { "epoch": 16.860122699386505, "grad_norm": 0.24579167366027832, "learning_rate": 3.229912243067007e-06, "loss": 0.6790754795074463, "step": 13741 }, { "epoch": 16.861349693251533, "grad_norm": 0.2730740010738373, "learning_rate": 3.2274451061047485e-06, "loss": 0.5359553694725037, "step": 13742 }, { "epoch": 16.862576687116565, "grad_norm": 0.2665250301361084, "learning_rate": 3.2249788467320797e-06, "loss": 0.6501333117485046, "step": 13743 }, { "epoch": 16.863803680981594, "grad_norm": 0.25329720973968506, "learning_rate": 3.2225134650484157e-06, "loss": 0.5518468022346497, "step": 13744 }, { "epoch": 16.865030674846626, "grad_norm": 0.25856080651283264, "learning_rate": 3.2200489611531157e-06, "loss": 0.7172317504882812, "step": 13745 }, { "epoch": 16.866257668711658, "grad_norm": 0.26289674639701843, "learning_rate": 3.217585335145526e-06, "loss": 0.5099642872810364, "step": 13746 }, { "epoch": 16.867484662576686, "grad_norm": 0.3120076060295105, "learning_rate": 3.215122587124944e-06, "loss": 0.510208785533905, "step": 13747 }, { "epoch": 16.868711656441718, "grad_norm": 0.3081243932247162, "learning_rate": 3.212660717190638e-06, "loss": 0.7659071683883667, "step": 13748 }, { "epoch": 16.86993865030675, "grad_norm": 0.2681199312210083, "learning_rate": 3.2101997254418454e-06, "loss": 0.5128135085105896, "step": 13749 }, { "epoch": 16.87116564417178, "grad_norm": 0.26554596424102783, "learning_rate": 3.2077396119777505e-06, "loss": 0.4913082420825958, "step": 13750 }, { "epoch": 16.87239263803681, "grad_norm": 0.2607540190219879, "learning_rate": 3.2052803768975182e-06, "loss": 0.5878385901451111, "step": 13751 }, { "epoch": 16.87361963190184, "grad_norm": 0.2830754220485687, "learning_rate": 3.2028220203002722e-06, "loss": 0.43259763717651367, "step": 13752 }, { "epoch": 16.87484662576687, "grad_norm": 0.25514790415763855, "learning_rate": 3.200364542285106e-06, "loss": 0.6816558837890625, "step": 13753 }, { "epoch": 16.876073619631903, "grad_norm": 0.2463422268629074, "learning_rate": 3.197907942951073e-06, "loss": 0.5254182815551758, "step": 13754 }, { "epoch": 16.87730061349693, "grad_norm": 0.23517988622188568, "learning_rate": 3.1954522223971855e-06, "loss": 0.6355121731758118, "step": 13755 }, { "epoch": 16.878527607361963, "grad_norm": 0.2737375497817993, "learning_rate": 3.1929973807224234e-06, "loss": 0.6065630316734314, "step": 13756 }, { "epoch": 16.879754601226995, "grad_norm": 0.28256160020828247, "learning_rate": 3.190543418025749e-06, "loss": 0.679277241230011, "step": 13757 }, { "epoch": 16.880981595092024, "grad_norm": 0.24832703173160553, "learning_rate": 3.1880903344060688e-06, "loss": 0.5604299902915955, "step": 13758 }, { "epoch": 16.882208588957056, "grad_norm": 0.3291320204734802, "learning_rate": 3.1856381299622567e-06, "loss": 0.7053631544113159, "step": 13759 }, { "epoch": 16.883435582822084, "grad_norm": 0.270673543214798, "learning_rate": 3.1831868047931505e-06, "loss": 0.4442753195762634, "step": 13760 }, { "epoch": 16.884662576687116, "grad_norm": 0.3692893087863922, "learning_rate": 3.1807363589975633e-06, "loss": 0.6387861371040344, "step": 13761 }, { "epoch": 16.88588957055215, "grad_norm": 0.2620218098163605, "learning_rate": 3.178286792674265e-06, "loss": 0.56974858045578, "step": 13762 }, { "epoch": 16.887116564417177, "grad_norm": 0.2863369286060333, "learning_rate": 3.175838105921991e-06, "loss": 0.6182793974876404, "step": 13763 }, { "epoch": 16.88834355828221, "grad_norm": 0.29439762234687805, "learning_rate": 3.1733902988394348e-06, "loss": 0.6389535665512085, "step": 13764 }, { "epoch": 16.88957055214724, "grad_norm": 0.3007885217666626, "learning_rate": 3.170943371525262e-06, "loss": 0.6199378371238708, "step": 13765 }, { "epoch": 16.89079754601227, "grad_norm": 0.2962051331996918, "learning_rate": 3.168497324078104e-06, "loss": 0.6735115051269531, "step": 13766 }, { "epoch": 16.8920245398773, "grad_norm": 0.27031639218330383, "learning_rate": 3.166052156596555e-06, "loss": 0.801520824432373, "step": 13767 }, { "epoch": 16.89325153374233, "grad_norm": 0.27056553959846497, "learning_rate": 3.1636078691791747e-06, "loss": 0.649016261100769, "step": 13768 }, { "epoch": 16.89447852760736, "grad_norm": 0.2632260024547577, "learning_rate": 3.161164461924479e-06, "loss": 0.6305214166641235, "step": 13769 }, { "epoch": 16.895705521472394, "grad_norm": 0.2361409217119217, "learning_rate": 3.1587219349309554e-06, "loss": 0.42255139350891113, "step": 13770 }, { "epoch": 16.896932515337422, "grad_norm": 0.2722070813179016, "learning_rate": 3.156280288297059e-06, "loss": 0.4780462384223938, "step": 13771 }, { "epoch": 16.898159509202454, "grad_norm": 0.2594931721687317, "learning_rate": 3.1538395221212017e-06, "loss": 0.6175495386123657, "step": 13772 }, { "epoch": 16.899386503067486, "grad_norm": 0.27454596757888794, "learning_rate": 3.151399636501773e-06, "loss": 0.6337130069732666, "step": 13773 }, { "epoch": 16.900613496932515, "grad_norm": 0.28184860944747925, "learning_rate": 3.148960631537107e-06, "loss": 0.8191279172897339, "step": 13774 }, { "epoch": 16.901840490797547, "grad_norm": 0.24346362054347992, "learning_rate": 3.1465225073255138e-06, "loss": 0.5612567663192749, "step": 13775 }, { "epoch": 16.903067484662575, "grad_norm": 0.3001158535480499, "learning_rate": 3.1440852639652712e-06, "loss": 0.5455963611602783, "step": 13776 }, { "epoch": 16.904294478527607, "grad_norm": 0.2513524889945984, "learning_rate": 3.141648901554617e-06, "loss": 0.6242046356201172, "step": 13777 }, { "epoch": 16.90552147239264, "grad_norm": 0.25557634234428406, "learning_rate": 3.139213420191753e-06, "loss": 0.5038963556289673, "step": 13778 }, { "epoch": 16.906748466257667, "grad_norm": 0.2732630670070648, "learning_rate": 3.136778819974848e-06, "loss": 0.45384299755096436, "step": 13779 }, { "epoch": 16.9079754601227, "grad_norm": 0.2748847007751465, "learning_rate": 3.134345101002034e-06, "loss": 0.718559205532074, "step": 13780 }, { "epoch": 16.90920245398773, "grad_norm": 0.2484334409236908, "learning_rate": 3.1319122633714033e-06, "loss": 0.6282087564468384, "step": 13781 }, { "epoch": 16.91042944785276, "grad_norm": 0.2783946990966797, "learning_rate": 3.1294803071810208e-06, "loss": 0.6779732704162598, "step": 13782 }, { "epoch": 16.911656441717792, "grad_norm": 0.23616373538970947, "learning_rate": 3.1270492325289168e-06, "loss": 0.18946753442287445, "step": 13783 }, { "epoch": 16.91288343558282, "grad_norm": 0.2642236053943634, "learning_rate": 3.124619039513069e-06, "loss": 0.5851898789405823, "step": 13784 }, { "epoch": 16.914110429447852, "grad_norm": 0.2812683880329132, "learning_rate": 3.1221897282314365e-06, "loss": 0.5933594107627869, "step": 13785 }, { "epoch": 16.915337423312884, "grad_norm": 0.2761247158050537, "learning_rate": 3.119761298781937e-06, "loss": 0.5728610754013062, "step": 13786 }, { "epoch": 16.916564417177913, "grad_norm": 0.29837897419929504, "learning_rate": 3.1173337512624577e-06, "loss": 0.5507336854934692, "step": 13787 }, { "epoch": 16.917791411042945, "grad_norm": 0.2799181044101715, "learning_rate": 3.1149070857708464e-06, "loss": 0.8514045476913452, "step": 13788 }, { "epoch": 16.919018404907977, "grad_norm": 0.23354382812976837, "learning_rate": 3.1124813024049097e-06, "loss": 0.3591523766517639, "step": 13789 }, { "epoch": 16.920245398773005, "grad_norm": 0.2540367841720581, "learning_rate": 3.1100564012624263e-06, "loss": 0.6716083288192749, "step": 13790 }, { "epoch": 16.921472392638037, "grad_norm": 0.3188624978065491, "learning_rate": 3.1076323824411365e-06, "loss": 0.4784737527370453, "step": 13791 }, { "epoch": 16.92269938650307, "grad_norm": 0.2832435369491577, "learning_rate": 3.1052092460387456e-06, "loss": 0.6434950828552246, "step": 13792 }, { "epoch": 16.923926380368098, "grad_norm": 0.26883408427238464, "learning_rate": 3.1027869921529284e-06, "loss": 0.6954885721206665, "step": 13793 }, { "epoch": 16.92515337423313, "grad_norm": 0.2820315659046173, "learning_rate": 3.1003656208813126e-06, "loss": 0.6852793097496033, "step": 13794 }, { "epoch": 16.926380368098158, "grad_norm": 0.24520422518253326, "learning_rate": 3.0979451323214975e-06, "loss": 0.6102402210235596, "step": 13795 }, { "epoch": 16.92760736196319, "grad_norm": 0.2705557644367218, "learning_rate": 3.095525526571047e-06, "loss": 0.6843574047088623, "step": 13796 }, { "epoch": 16.928834355828222, "grad_norm": 0.2503259479999542, "learning_rate": 3.0931068037274885e-06, "loss": 0.4776045083999634, "step": 13797 }, { "epoch": 16.93006134969325, "grad_norm": 0.303416907787323, "learning_rate": 3.0906889638883218e-06, "loss": 0.570721447467804, "step": 13798 }, { "epoch": 16.931288343558283, "grad_norm": 0.2753414511680603, "learning_rate": 3.0882720071509886e-06, "loss": 0.5584607720375061, "step": 13799 }, { "epoch": 16.93251533742331, "grad_norm": 0.3172058165073395, "learning_rate": 3.085855933612916e-06, "loss": 0.6621177196502686, "step": 13800 }, { "epoch": 16.933742331288343, "grad_norm": 0.24790215492248535, "learning_rate": 3.083440743371485e-06, "loss": 0.5959197878837585, "step": 13801 }, { "epoch": 16.934969325153375, "grad_norm": 0.27533939480781555, "learning_rate": 3.0810264365240622e-06, "loss": 0.5889092683792114, "step": 13802 }, { "epoch": 16.936196319018403, "grad_norm": 0.2512284815311432, "learning_rate": 3.0786130131679414e-06, "loss": 0.6186167597770691, "step": 13803 }, { "epoch": 16.937423312883435, "grad_norm": 0.2736882269382477, "learning_rate": 3.076200473400409e-06, "loss": 0.34428709745407104, "step": 13804 }, { "epoch": 16.938650306748468, "grad_norm": 0.2531657814979553, "learning_rate": 3.073788817318707e-06, "loss": 0.41363275051116943, "step": 13805 }, { "epoch": 16.939877300613496, "grad_norm": 0.28513845801353455, "learning_rate": 3.071378045020043e-06, "loss": 0.5021578669548035, "step": 13806 }, { "epoch": 16.941104294478528, "grad_norm": 0.2523878812789917, "learning_rate": 3.0689681566015898e-06, "loss": 0.7280057668685913, "step": 13807 }, { "epoch": 16.94233128834356, "grad_norm": 0.3096081614494324, "learning_rate": 3.066559152160478e-06, "loss": 0.590427041053772, "step": 13808 }, { "epoch": 16.94355828220859, "grad_norm": 0.26260095834732056, "learning_rate": 3.0641510317938106e-06, "loss": 0.4096079170703888, "step": 13809 }, { "epoch": 16.94478527607362, "grad_norm": 0.2792210876941681, "learning_rate": 3.061743795598651e-06, "loss": 0.7827041149139404, "step": 13810 }, { "epoch": 16.94601226993865, "grad_norm": 0.24348987638950348, "learning_rate": 3.0593374436720305e-06, "loss": 0.6624915599822998, "step": 13811 }, { "epoch": 16.94723926380368, "grad_norm": 0.26864591240882874, "learning_rate": 3.0569319761109468e-06, "loss": 0.5436919927597046, "step": 13812 }, { "epoch": 16.948466257668713, "grad_norm": 0.28100740909576416, "learning_rate": 3.0545273930123462e-06, "loss": 0.6822492480278015, "step": 13813 }, { "epoch": 16.94969325153374, "grad_norm": 0.3152849078178406, "learning_rate": 3.0521236944731553e-06, "loss": 0.6227823495864868, "step": 13814 }, { "epoch": 16.950920245398773, "grad_norm": 0.2796263098716736, "learning_rate": 3.0497208805902593e-06, "loss": 0.6534982323646545, "step": 13815 }, { "epoch": 16.952147239263805, "grad_norm": 0.26652318239212036, "learning_rate": 3.047318951460512e-06, "loss": 0.41542816162109375, "step": 13816 }, { "epoch": 16.953374233128834, "grad_norm": 0.2650436460971832, "learning_rate": 3.0449179071807303e-06, "loss": 0.5847918391227722, "step": 13817 }, { "epoch": 16.954601226993866, "grad_norm": 0.272148996591568, "learning_rate": 3.0425177478476866e-06, "loss": 0.5130231380462646, "step": 13818 }, { "epoch": 16.955828220858894, "grad_norm": 0.2490491420030594, "learning_rate": 3.040118473558129e-06, "loss": 0.6397589445114136, "step": 13819 }, { "epoch": 16.957055214723926, "grad_norm": 0.256782591342926, "learning_rate": 3.0377200844087626e-06, "loss": 0.649538516998291, "step": 13820 }, { "epoch": 16.958282208588958, "grad_norm": 0.24322226643562317, "learning_rate": 3.035322580496261e-06, "loss": 0.37438148260116577, "step": 13821 }, { "epoch": 16.959509202453987, "grad_norm": 0.28552621603012085, "learning_rate": 3.032925961917266e-06, "loss": 0.3265801668167114, "step": 13822 }, { "epoch": 16.96073619631902, "grad_norm": 0.2790553569793701, "learning_rate": 3.0305302287683635e-06, "loss": 0.7768080830574036, "step": 13823 }, { "epoch": 16.96196319018405, "grad_norm": 0.2725055515766144, "learning_rate": 3.028135381146133e-06, "loss": 0.7175847291946411, "step": 13824 }, { "epoch": 16.96319018404908, "grad_norm": 0.24634051322937012, "learning_rate": 3.0257414191471018e-06, "loss": 0.5142165422439575, "step": 13825 }, { "epoch": 16.96441717791411, "grad_norm": 0.28369733691215515, "learning_rate": 3.023348342867763e-06, "loss": 0.6165050268173218, "step": 13826 }, { "epoch": 16.96564417177914, "grad_norm": 0.2949495315551758, "learning_rate": 3.0209561524045693e-06, "loss": 0.6255847215652466, "step": 13827 }, { "epoch": 16.96687116564417, "grad_norm": 0.31111204624176025, "learning_rate": 3.0185648478539497e-06, "loss": 0.5821938514709473, "step": 13828 }, { "epoch": 16.968098159509204, "grad_norm": 0.2747596800327301, "learning_rate": 3.0161744293122853e-06, "loss": 0.5773163437843323, "step": 13829 }, { "epoch": 16.969325153374232, "grad_norm": 0.29463592171669006, "learning_rate": 3.013784896875929e-06, "loss": 0.7086734175682068, "step": 13830 }, { "epoch": 16.970552147239264, "grad_norm": 0.2851579189300537, "learning_rate": 3.0113962506412046e-06, "loss": 0.5756483674049377, "step": 13831 }, { "epoch": 16.971779141104296, "grad_norm": 0.27384284138679504, "learning_rate": 3.009008490704379e-06, "loss": 0.5725868940353394, "step": 13832 }, { "epoch": 16.973006134969324, "grad_norm": 0.25746992230415344, "learning_rate": 3.006621617161698e-06, "loss": 0.5592246651649475, "step": 13833 }, { "epoch": 16.974233128834356, "grad_norm": 0.24482598900794983, "learning_rate": 3.0042356301093755e-06, "loss": 0.5050148367881775, "step": 13834 }, { "epoch": 16.975460122699385, "grad_norm": 0.2651931941509247, "learning_rate": 3.0018505296435795e-06, "loss": 0.5260801315307617, "step": 13835 }, { "epoch": 16.976687116564417, "grad_norm": 0.25520452857017517, "learning_rate": 2.9994663158604526e-06, "loss": 0.573399543762207, "step": 13836 }, { "epoch": 16.97791411042945, "grad_norm": 0.2568739652633667, "learning_rate": 2.997082988856087e-06, "loss": 0.5170280933380127, "step": 13837 }, { "epoch": 16.979141104294477, "grad_norm": 0.2573480010032654, "learning_rate": 2.9947005487265507e-06, "loss": 0.5422335267066956, "step": 13838 }, { "epoch": 16.98036809815951, "grad_norm": 0.24912434816360474, "learning_rate": 2.9923189955678747e-06, "loss": 0.49587708711624146, "step": 13839 }, { "epoch": 16.98159509202454, "grad_norm": 0.2594250738620758, "learning_rate": 2.98993832947605e-06, "loss": 0.599999189376831, "step": 13840 }, { "epoch": 16.98282208588957, "grad_norm": 0.27185922861099243, "learning_rate": 2.9875585505470454e-06, "loss": 0.5223504304885864, "step": 13841 }, { "epoch": 16.9840490797546, "grad_norm": 0.25060898065567017, "learning_rate": 2.9851796588767663e-06, "loss": 0.560420036315918, "step": 13842 }, { "epoch": 16.98527607361963, "grad_norm": 0.30097413063049316, "learning_rate": 2.982801654561107e-06, "loss": 0.6724710464477539, "step": 13843 }, { "epoch": 16.986503067484662, "grad_norm": 0.2541816234588623, "learning_rate": 2.9804245376959196e-06, "loss": 0.535612940788269, "step": 13844 }, { "epoch": 16.987730061349694, "grad_norm": 0.2822834849357605, "learning_rate": 2.978048308377018e-06, "loss": 0.8330264091491699, "step": 13845 }, { "epoch": 16.988957055214723, "grad_norm": 0.30416765809059143, "learning_rate": 2.97567296670018e-06, "loss": 0.6940997838973999, "step": 13846 }, { "epoch": 16.990184049079755, "grad_norm": 0.2611723840236664, "learning_rate": 2.973298512761147e-06, "loss": 0.6274027228355408, "step": 13847 }, { "epoch": 16.991411042944787, "grad_norm": 0.2533344328403473, "learning_rate": 2.9709249466556317e-06, "loss": 0.5577548146247864, "step": 13848 }, { "epoch": 16.992638036809815, "grad_norm": 0.27019330859184265, "learning_rate": 2.9685522684792994e-06, "loss": 0.31882721185684204, "step": 13849 }, { "epoch": 16.993865030674847, "grad_norm": 0.2527543604373932, "learning_rate": 2.9661804783277986e-06, "loss": 0.6008340716362, "step": 13850 }, { "epoch": 16.99509202453988, "grad_norm": 0.26821568608283997, "learning_rate": 2.963809576296714e-06, "loss": 0.7033910751342773, "step": 13851 }, { "epoch": 16.996319018404908, "grad_norm": 0.28280729055404663, "learning_rate": 2.961439562481613e-06, "loss": 0.4563539922237396, "step": 13852 }, { "epoch": 16.99754601226994, "grad_norm": 0.26525914669036865, "learning_rate": 2.9590704369780313e-06, "loss": 0.6936284303665161, "step": 13853 }, { "epoch": 16.998773006134968, "grad_norm": 0.2654562294483185, "learning_rate": 2.956702199881456e-06, "loss": 0.5382267832756042, "step": 13854 }, { "epoch": 17.0, "grad_norm": 0.3283005654811859, "learning_rate": 2.954334851287352e-06, "loss": 0.478439062833786, "step": 13855 }, { "epoch": 17.001226993865032, "grad_norm": 0.25835704803466797, "learning_rate": 2.9519683912911266e-06, "loss": 0.5971211194992065, "step": 13856 }, { "epoch": 17.00245398773006, "grad_norm": 0.2828556001186371, "learning_rate": 2.949602819988173e-06, "loss": 0.5935590267181396, "step": 13857 }, { "epoch": 17.003680981595092, "grad_norm": 0.330005943775177, "learning_rate": 2.94723813747384e-06, "loss": 0.5890476703643799, "step": 13858 }, { "epoch": 17.004907975460124, "grad_norm": 0.255816787481308, "learning_rate": 2.944874343843443e-06, "loss": 0.6468571424484253, "step": 13859 }, { "epoch": 17.006134969325153, "grad_norm": 0.26757174730300903, "learning_rate": 2.942511439192261e-06, "loss": 0.7173235416412354, "step": 13860 }, { "epoch": 17.007361963190185, "grad_norm": 0.272662490606308, "learning_rate": 2.940149423615529e-06, "loss": 0.5553626418113708, "step": 13861 }, { "epoch": 17.008588957055213, "grad_norm": 0.28645092248916626, "learning_rate": 2.9377882972084576e-06, "loss": 0.6260792016983032, "step": 13862 }, { "epoch": 17.009815950920245, "grad_norm": 0.25429534912109375, "learning_rate": 2.9354280600662172e-06, "loss": 0.6057968139648438, "step": 13863 }, { "epoch": 17.011042944785277, "grad_norm": 0.254809707403183, "learning_rate": 2.9330687122839372e-06, "loss": 0.5453531742095947, "step": 13864 }, { "epoch": 17.012269938650306, "grad_norm": 0.282833069562912, "learning_rate": 2.930710253956731e-06, "loss": 0.6575846672058105, "step": 13865 }, { "epoch": 17.013496932515338, "grad_norm": 0.25452202558517456, "learning_rate": 2.9283526851796417e-06, "loss": 0.6105513572692871, "step": 13866 }, { "epoch": 17.01472392638037, "grad_norm": 0.26165157556533813, "learning_rate": 2.925996006047707e-06, "loss": 0.5939786434173584, "step": 13867 }, { "epoch": 17.0159509202454, "grad_norm": 0.258941650390625, "learning_rate": 2.9236402166559123e-06, "loss": 0.5398580431938171, "step": 13868 }, { "epoch": 17.01717791411043, "grad_norm": 0.2444240301847458, "learning_rate": 2.921285317099226e-06, "loss": 0.5112547278404236, "step": 13869 }, { "epoch": 17.01840490797546, "grad_norm": 0.267643541097641, "learning_rate": 2.9189313074725528e-06, "loss": 0.562652587890625, "step": 13870 }, { "epoch": 17.01963190184049, "grad_norm": 0.22871407866477966, "learning_rate": 2.9165781878707803e-06, "loss": 0.4711458683013916, "step": 13871 }, { "epoch": 17.020858895705523, "grad_norm": 0.2618667781352997, "learning_rate": 2.9142259583887584e-06, "loss": 0.5793715119361877, "step": 13872 }, { "epoch": 17.02208588957055, "grad_norm": 0.27021604776382446, "learning_rate": 2.9118746191212963e-06, "loss": 0.6939568519592285, "step": 13873 }, { "epoch": 17.023312883435583, "grad_norm": 0.2705878019332886, "learning_rate": 2.9095241701631776e-06, "loss": 0.6396958827972412, "step": 13874 }, { "epoch": 17.024539877300615, "grad_norm": 0.2538408935070038, "learning_rate": 2.9071746116091313e-06, "loss": 0.4423515200614929, "step": 13875 }, { "epoch": 17.025766871165644, "grad_norm": 0.2306855469942093, "learning_rate": 2.9048259435538654e-06, "loss": 0.4484767019748688, "step": 13876 }, { "epoch": 17.026993865030676, "grad_norm": 0.2550942599773407, "learning_rate": 2.9024781660920457e-06, "loss": 0.5819412469863892, "step": 13877 }, { "epoch": 17.028220858895704, "grad_norm": 0.24638362228870392, "learning_rate": 2.900131279318308e-06, "loss": 0.43201589584350586, "step": 13878 }, { "epoch": 17.029447852760736, "grad_norm": 0.2510530948638916, "learning_rate": 2.8977852833272534e-06, "loss": 0.45210951566696167, "step": 13879 }, { "epoch": 17.030674846625768, "grad_norm": 0.25514525175094604, "learning_rate": 2.895440178213432e-06, "loss": 0.6180754899978638, "step": 13880 }, { "epoch": 17.031901840490796, "grad_norm": 0.26647552847862244, "learning_rate": 2.893095964071374e-06, "loss": 0.7037829160690308, "step": 13881 }, { "epoch": 17.03312883435583, "grad_norm": 0.2525327503681183, "learning_rate": 2.8907526409955645e-06, "loss": 0.710308849811554, "step": 13882 }, { "epoch": 17.03435582822086, "grad_norm": 0.2555586099624634, "learning_rate": 2.888410209080458e-06, "loss": 0.6458879709243774, "step": 13883 }, { "epoch": 17.03558282208589, "grad_norm": 0.2888171672821045, "learning_rate": 2.886068668420475e-06, "loss": 0.5883436799049377, "step": 13884 }, { "epoch": 17.03680981595092, "grad_norm": 0.3374177813529968, "learning_rate": 2.8837280191099937e-06, "loss": 0.6582821011543274, "step": 13885 }, { "epoch": 17.03803680981595, "grad_norm": 0.25231659412384033, "learning_rate": 2.881388261243356e-06, "loss": 0.641211986541748, "step": 13886 }, { "epoch": 17.03926380368098, "grad_norm": 0.24996615946292877, "learning_rate": 2.879049394914873e-06, "loss": 0.6054675579071045, "step": 13887 }, { "epoch": 17.040490797546013, "grad_norm": 0.2523311972618103, "learning_rate": 2.8767114202188194e-06, "loss": 0.32032376527786255, "step": 13888 }, { "epoch": 17.041717791411042, "grad_norm": 0.2579764425754547, "learning_rate": 2.8743743372494274e-06, "loss": 0.6235334277153015, "step": 13889 }, { "epoch": 17.042944785276074, "grad_norm": 0.27568820118904114, "learning_rate": 2.8720381461009056e-06, "loss": 0.543865442276001, "step": 13890 }, { "epoch": 17.044171779141106, "grad_norm": 0.27816149592399597, "learning_rate": 2.8697028468674115e-06, "loss": 0.48786690831184387, "step": 13891 }, { "epoch": 17.045398773006134, "grad_norm": 0.2457149773836136, "learning_rate": 2.8673684396430782e-06, "loss": 0.531576931476593, "step": 13892 }, { "epoch": 17.046625766871166, "grad_norm": 0.29163670539855957, "learning_rate": 2.8650349245220033e-06, "loss": 0.6749392747879028, "step": 13893 }, { "epoch": 17.047852760736195, "grad_norm": 0.29776912927627563, "learning_rate": 2.862702301598241e-06, "loss": 0.6577038764953613, "step": 13894 }, { "epoch": 17.049079754601227, "grad_norm": 0.26423972845077515, "learning_rate": 2.8603705709658064e-06, "loss": 0.602951169013977, "step": 13895 }, { "epoch": 17.05030674846626, "grad_norm": 0.3001272976398468, "learning_rate": 2.85803973271869e-06, "loss": 0.3152523636817932, "step": 13896 }, { "epoch": 17.051533742331287, "grad_norm": 0.2555345296859741, "learning_rate": 2.8557097869508417e-06, "loss": 0.7038553953170776, "step": 13897 }, { "epoch": 17.05276073619632, "grad_norm": 0.22164253890514374, "learning_rate": 2.853380733756175e-06, "loss": 0.5561801195144653, "step": 13898 }, { "epoch": 17.05398773006135, "grad_norm": 0.24740667641162872, "learning_rate": 2.8510525732285713e-06, "loss": 0.3868224024772644, "step": 13899 }, { "epoch": 17.05521472392638, "grad_norm": 0.21956279873847961, "learning_rate": 2.8487253054618635e-06, "loss": 0.48617619276046753, "step": 13900 }, { "epoch": 17.05644171779141, "grad_norm": 0.24428348243236542, "learning_rate": 2.8463989305498596e-06, "loss": 0.6227357387542725, "step": 13901 }, { "epoch": 17.05766871165644, "grad_norm": 0.24012306332588196, "learning_rate": 2.8440734485863317e-06, "loss": 0.43659043312072754, "step": 13902 }, { "epoch": 17.058895705521472, "grad_norm": 0.24568866193294525, "learning_rate": 2.8417488596650112e-06, "loss": 0.5900707840919495, "step": 13903 }, { "epoch": 17.060122699386504, "grad_norm": 0.3366055488586426, "learning_rate": 2.839425163879603e-06, "loss": 0.7083948254585266, "step": 13904 }, { "epoch": 17.061349693251532, "grad_norm": 0.27815163135528564, "learning_rate": 2.8371023613237566e-06, "loss": 0.7006757259368896, "step": 13905 }, { "epoch": 17.062576687116565, "grad_norm": 0.24466396868228912, "learning_rate": 2.834780452091104e-06, "loss": 0.4840347170829773, "step": 13906 }, { "epoch": 17.063803680981597, "grad_norm": 0.2753566801548004, "learning_rate": 2.832459436275234e-06, "loss": 0.7862211465835571, "step": 13907 }, { "epoch": 17.065030674846625, "grad_norm": 0.3026619255542755, "learning_rate": 2.8301393139697015e-06, "loss": 0.7132909893989563, "step": 13908 }, { "epoch": 17.066257668711657, "grad_norm": 0.29557088017463684, "learning_rate": 2.8278200852680264e-06, "loss": 0.7003825902938843, "step": 13909 }, { "epoch": 17.067484662576685, "grad_norm": 0.2728176712989807, "learning_rate": 2.8255017502636814e-06, "loss": 0.4656066596508026, "step": 13910 }, { "epoch": 17.068711656441717, "grad_norm": 0.2544122040271759, "learning_rate": 2.82318430905012e-06, "loss": 0.5611460208892822, "step": 13911 }, { "epoch": 17.06993865030675, "grad_norm": 0.271610289812088, "learning_rate": 2.8208677617207414e-06, "loss": 0.6712468266487122, "step": 13912 }, { "epoch": 17.071165644171778, "grad_norm": 0.24542735517024994, "learning_rate": 2.8185521083689353e-06, "loss": 0.6329667568206787, "step": 13913 }, { "epoch": 17.07239263803681, "grad_norm": 0.2439097911119461, "learning_rate": 2.816237349088027e-06, "loss": 0.5409891605377197, "step": 13914 }, { "epoch": 17.073619631901842, "grad_norm": 0.2701896131038666, "learning_rate": 2.813923483971323e-06, "loss": 0.704211950302124, "step": 13915 }, { "epoch": 17.07484662576687, "grad_norm": 0.2880416810512543, "learning_rate": 2.811610513112084e-06, "loss": 0.5415267944335938, "step": 13916 }, { "epoch": 17.076073619631902, "grad_norm": 0.2667291462421417, "learning_rate": 2.8092984366035412e-06, "loss": 0.5455566644668579, "step": 13917 }, { "epoch": 17.07730061349693, "grad_norm": 0.27530357241630554, "learning_rate": 2.806987254538895e-06, "loss": 0.6494063138961792, "step": 13918 }, { "epoch": 17.078527607361963, "grad_norm": 0.23975853621959686, "learning_rate": 2.804676967011291e-06, "loss": 0.4869493842124939, "step": 13919 }, { "epoch": 17.079754601226995, "grad_norm": 0.3020276427268982, "learning_rate": 2.802367574113857e-06, "loss": 0.5732480883598328, "step": 13920 }, { "epoch": 17.080981595092023, "grad_norm": 0.2844080924987793, "learning_rate": 2.8000590759396733e-06, "loss": 0.567147433757782, "step": 13921 }, { "epoch": 17.082208588957055, "grad_norm": 0.27537262439727783, "learning_rate": 2.797751472581794e-06, "loss": 0.646065890789032, "step": 13922 }, { "epoch": 17.083435582822087, "grad_norm": 0.260479211807251, "learning_rate": 2.795444764133234e-06, "loss": 0.41286033391952515, "step": 13923 }, { "epoch": 17.084662576687116, "grad_norm": 0.29493439197540283, "learning_rate": 2.7931389506869632e-06, "loss": 0.5842751860618591, "step": 13924 }, { "epoch": 17.085889570552148, "grad_norm": 0.24333162605762482, "learning_rate": 2.7908340323359233e-06, "loss": 0.6108719110488892, "step": 13925 }, { "epoch": 17.08711656441718, "grad_norm": 0.2920682728290558, "learning_rate": 2.788530009173021e-06, "loss": 0.7976217865943909, "step": 13926 }, { "epoch": 17.088343558282208, "grad_norm": 0.2199944704771042, "learning_rate": 2.786226881291126e-06, "loss": 0.39757251739501953, "step": 13927 }, { "epoch": 17.08957055214724, "grad_norm": 0.24623841047286987, "learning_rate": 2.7839246487830762e-06, "loss": 0.5202158093452454, "step": 13928 }, { "epoch": 17.09079754601227, "grad_norm": 0.24645490944385529, "learning_rate": 2.7816233117416546e-06, "loss": 0.6362736225128174, "step": 13929 }, { "epoch": 17.0920245398773, "grad_norm": 0.2728424668312073, "learning_rate": 2.779322870259626e-06, "loss": 0.4953644871711731, "step": 13930 }, { "epoch": 17.093251533742333, "grad_norm": 0.2739006280899048, "learning_rate": 2.777023324429723e-06, "loss": 0.6691982746124268, "step": 13931 }, { "epoch": 17.09447852760736, "grad_norm": 0.26969921588897705, "learning_rate": 2.7747246743446228e-06, "loss": 0.6047170758247375, "step": 13932 }, { "epoch": 17.095705521472393, "grad_norm": 0.22430957853794098, "learning_rate": 2.7724269200969905e-06, "loss": 0.574451208114624, "step": 13933 }, { "epoch": 17.096932515337425, "grad_norm": 0.23357443511486053, "learning_rate": 2.770130061779422e-06, "loss": 0.4192257821559906, "step": 13934 }, { "epoch": 17.098159509202453, "grad_norm": 0.24083419144153595, "learning_rate": 2.767834099484515e-06, "loss": 0.5505349636077881, "step": 13935 }, { "epoch": 17.099386503067485, "grad_norm": 0.29817962646484375, "learning_rate": 2.7655390333048066e-06, "loss": 0.5081351399421692, "step": 13936 }, { "epoch": 17.100613496932514, "grad_norm": 0.25647857785224915, "learning_rate": 2.763244863332812e-06, "loss": 0.46267345547676086, "step": 13937 }, { "epoch": 17.101840490797546, "grad_norm": 0.2573675811290741, "learning_rate": 2.76095158966099e-06, "loss": 0.605611264705658, "step": 13938 }, { "epoch": 17.103067484662578, "grad_norm": 0.24602706730365753, "learning_rate": 2.7586592123817813e-06, "loss": 0.5574156045913696, "step": 13939 }, { "epoch": 17.104294478527606, "grad_norm": 0.2798295319080353, "learning_rate": 2.7563677315875863e-06, "loss": 0.7397669553756714, "step": 13940 }, { "epoch": 17.10552147239264, "grad_norm": 0.2602495551109314, "learning_rate": 2.7540771473707648e-06, "loss": 0.5910102128982544, "step": 13941 }, { "epoch": 17.10674846625767, "grad_norm": 0.2589232325553894, "learning_rate": 2.7517874598236544e-06, "loss": 0.5382744073867798, "step": 13942 }, { "epoch": 17.1079754601227, "grad_norm": 0.26799437403678894, "learning_rate": 2.7494986690385305e-06, "loss": 0.4095979332923889, "step": 13943 }, { "epoch": 17.10920245398773, "grad_norm": 0.2865317463874817, "learning_rate": 2.7472107751076557e-06, "loss": 0.6805552244186401, "step": 13944 }, { "epoch": 17.11042944785276, "grad_norm": 0.31856217980384827, "learning_rate": 2.7449237781232457e-06, "loss": 0.7946807146072388, "step": 13945 }, { "epoch": 17.11165644171779, "grad_norm": 0.38888558745384216, "learning_rate": 2.742637678177487e-06, "loss": 0.6208178997039795, "step": 13946 }, { "epoch": 17.112883435582823, "grad_norm": 0.27597737312316895, "learning_rate": 2.7403524753625285e-06, "loss": 0.5532024502754211, "step": 13947 }, { "epoch": 17.11411042944785, "grad_norm": 0.23957785964012146, "learning_rate": 2.738068169770469e-06, "loss": 0.6324502229690552, "step": 13948 }, { "epoch": 17.115337423312884, "grad_norm": 0.30252301692962646, "learning_rate": 2.735784761493387e-06, "loss": 0.8375701904296875, "step": 13949 }, { "epoch": 17.116564417177916, "grad_norm": 0.26276618242263794, "learning_rate": 2.7335022506233265e-06, "loss": 0.5590027570724487, "step": 13950 }, { "epoch": 17.117791411042944, "grad_norm": 0.2876454293727875, "learning_rate": 2.73122063725228e-06, "loss": 0.656793475151062, "step": 13951 }, { "epoch": 17.119018404907976, "grad_norm": 0.25187548995018005, "learning_rate": 2.7289399214722235e-06, "loss": 0.4343380331993103, "step": 13952 }, { "epoch": 17.120245398773005, "grad_norm": 0.25467729568481445, "learning_rate": 2.7266601033750754e-06, "loss": 0.4449816942214966, "step": 13953 }, { "epoch": 17.121472392638037, "grad_norm": 0.24554435908794403, "learning_rate": 2.724381183052732e-06, "loss": 0.5720722675323486, "step": 13954 }, { "epoch": 17.12269938650307, "grad_norm": 0.2643505930900574, "learning_rate": 2.7221031605970504e-06, "loss": 0.43029749393463135, "step": 13955 }, { "epoch": 17.123926380368097, "grad_norm": 0.27282246947288513, "learning_rate": 2.719826036099854e-06, "loss": 0.7172949314117432, "step": 13956 }, { "epoch": 17.12515337423313, "grad_norm": 0.28202974796295166, "learning_rate": 2.717549809652925e-06, "loss": 0.46908941864967346, "step": 13957 }, { "epoch": 17.12638036809816, "grad_norm": 0.25581517815589905, "learning_rate": 2.7152744813480096e-06, "loss": 0.42680084705352783, "step": 13958 }, { "epoch": 17.12760736196319, "grad_norm": 0.2613404095172882, "learning_rate": 2.7130000512768235e-06, "loss": 0.5793277621269226, "step": 13959 }, { "epoch": 17.12883435582822, "grad_norm": 0.26003792881965637, "learning_rate": 2.71072651953104e-06, "loss": 0.6889005303382874, "step": 13960 }, { "epoch": 17.13006134969325, "grad_norm": 0.2602825462818146, "learning_rate": 2.708453886202303e-06, "loss": 0.6396095752716064, "step": 13961 }, { "epoch": 17.131288343558282, "grad_norm": 0.25905364751815796, "learning_rate": 2.706182151382211e-06, "loss": 0.5212470293045044, "step": 13962 }, { "epoch": 17.132515337423314, "grad_norm": 0.24799074232578278, "learning_rate": 2.70391131516233e-06, "loss": 0.4417160451412201, "step": 13963 }, { "epoch": 17.133742331288342, "grad_norm": 0.3309270143508911, "learning_rate": 2.7016413776341924e-06, "loss": 0.7084729671478271, "step": 13964 }, { "epoch": 17.134969325153374, "grad_norm": 0.2633299231529236, "learning_rate": 2.699372338889297e-06, "loss": 0.556012749671936, "step": 13965 }, { "epoch": 17.136196319018406, "grad_norm": 0.2731941044330597, "learning_rate": 2.6971041990191015e-06, "loss": 0.6788474917411804, "step": 13966 }, { "epoch": 17.137423312883435, "grad_norm": 0.2943129241466522, "learning_rate": 2.694836958115024e-06, "loss": 0.7646350860595703, "step": 13967 }, { "epoch": 17.138650306748467, "grad_norm": 0.25365617871284485, "learning_rate": 2.69257061626845e-06, "loss": 0.6179028749465942, "step": 13968 }, { "epoch": 17.139877300613495, "grad_norm": 0.26798805594444275, "learning_rate": 2.690305173570731e-06, "loss": 0.7007835507392883, "step": 13969 }, { "epoch": 17.141104294478527, "grad_norm": 0.23868346214294434, "learning_rate": 2.6880406301131836e-06, "loss": 0.677018940448761, "step": 13970 }, { "epoch": 17.14233128834356, "grad_norm": 0.2854723036289215, "learning_rate": 2.685776985987085e-06, "loss": 0.6751343011856079, "step": 13971 }, { "epoch": 17.143558282208588, "grad_norm": 0.3214074671268463, "learning_rate": 2.683514241283672e-06, "loss": 0.788837194442749, "step": 13972 }, { "epoch": 17.14478527607362, "grad_norm": 0.23953834176063538, "learning_rate": 2.68125239609415e-06, "loss": 0.4977913498878479, "step": 13973 }, { "epoch": 17.14601226993865, "grad_norm": 0.26228803396224976, "learning_rate": 2.678991450509688e-06, "loss": 0.49157002568244934, "step": 13974 }, { "epoch": 17.14723926380368, "grad_norm": 0.2703620195388794, "learning_rate": 2.6767314046214187e-06, "loss": 0.3661012053489685, "step": 13975 }, { "epoch": 17.148466257668712, "grad_norm": 0.278713196516037, "learning_rate": 2.674472258520444e-06, "loss": 0.7304446697235107, "step": 13976 }, { "epoch": 17.14969325153374, "grad_norm": 0.26773059368133545, "learning_rate": 2.6722140122978164e-06, "loss": 0.6489770412445068, "step": 13977 }, { "epoch": 17.150920245398773, "grad_norm": 0.2664966583251953, "learning_rate": 2.669956666044557e-06, "loss": 0.7048895359039307, "step": 13978 }, { "epoch": 17.152147239263805, "grad_norm": 0.2936203181743622, "learning_rate": 2.6677002198516543e-06, "loss": 0.7268843650817871, "step": 13979 }, { "epoch": 17.153374233128833, "grad_norm": 0.2539674639701843, "learning_rate": 2.665444673810072e-06, "loss": 0.7123627662658691, "step": 13980 }, { "epoch": 17.154601226993865, "grad_norm": 0.25633513927459717, "learning_rate": 2.663190028010709e-06, "loss": 0.6057264804840088, "step": 13981 }, { "epoch": 17.155828220858897, "grad_norm": 0.2588885724544525, "learning_rate": 2.660936282544449e-06, "loss": 0.539522647857666, "step": 13982 }, { "epoch": 17.157055214723925, "grad_norm": 0.2828221917152405, "learning_rate": 2.658683437502135e-06, "loss": 0.7286623120307922, "step": 13983 }, { "epoch": 17.158282208588957, "grad_norm": 0.27598366141319275, "learning_rate": 2.6564314929745703e-06, "loss": 0.6558487415313721, "step": 13984 }, { "epoch": 17.15950920245399, "grad_norm": 0.2696853578090668, "learning_rate": 2.6541804490525347e-06, "loss": 0.596808910369873, "step": 13985 }, { "epoch": 17.160736196319018, "grad_norm": 0.2850160300731659, "learning_rate": 2.651930305826747e-06, "loss": 0.6316828727722168, "step": 13986 }, { "epoch": 17.16196319018405, "grad_norm": 0.2557978928089142, "learning_rate": 2.649681063387907e-06, "loss": 0.39617031812667847, "step": 13987 }, { "epoch": 17.16319018404908, "grad_norm": 0.28541240096092224, "learning_rate": 2.6474327218266816e-06, "loss": 0.49982351064682007, "step": 13988 }, { "epoch": 17.16441717791411, "grad_norm": 0.2530246078968048, "learning_rate": 2.645185281233689e-06, "loss": 0.5462325811386108, "step": 13989 }, { "epoch": 17.165644171779142, "grad_norm": 0.23542168736457825, "learning_rate": 2.6429387416995217e-06, "loss": 0.4151510000228882, "step": 13990 }, { "epoch": 17.16687116564417, "grad_norm": 0.3003842532634735, "learning_rate": 2.640693103314734e-06, "loss": 0.6828837394714355, "step": 13991 }, { "epoch": 17.168098159509203, "grad_norm": 0.2528543472290039, "learning_rate": 2.638448366169832e-06, "loss": 0.510848879814148, "step": 13992 }, { "epoch": 17.169325153374235, "grad_norm": 0.2746555507183075, "learning_rate": 2.636204530355299e-06, "loss": 0.7528077960014343, "step": 13993 }, { "epoch": 17.170552147239263, "grad_norm": 0.2507275342941284, "learning_rate": 2.6339615959615787e-06, "loss": 0.5585680603981018, "step": 13994 }, { "epoch": 17.171779141104295, "grad_norm": 0.26022541522979736, "learning_rate": 2.631719563079074e-06, "loss": 0.6687304973602295, "step": 13995 }, { "epoch": 17.173006134969324, "grad_norm": 0.26747527718544006, "learning_rate": 2.6294784317981657e-06, "loss": 0.7407108545303345, "step": 13996 }, { "epoch": 17.174233128834356, "grad_norm": 0.2755427062511444, "learning_rate": 2.62723820220917e-06, "loss": 0.5069385170936584, "step": 13997 }, { "epoch": 17.175460122699388, "grad_norm": 0.2740311324596405, "learning_rate": 2.6249988744023985e-06, "loss": 0.6152913570404053, "step": 13998 }, { "epoch": 17.176687116564416, "grad_norm": 0.2739373445510864, "learning_rate": 2.6227604484681032e-06, "loss": 0.4042765498161316, "step": 13999 }, { "epoch": 17.177914110429448, "grad_norm": 0.2394448071718216, "learning_rate": 2.620522924496513e-06, "loss": 0.5079926252365112, "step": 14000 }, { "epoch": 17.17914110429448, "grad_norm": 0.29038113355636597, "learning_rate": 2.618286302577816e-06, "loss": 0.6555874943733215, "step": 14001 }, { "epoch": 17.18036809815951, "grad_norm": 0.24432389438152313, "learning_rate": 2.6160505828021626e-06, "loss": 0.4364728033542633, "step": 14002 }, { "epoch": 17.18159509202454, "grad_norm": 0.2429778277873993, "learning_rate": 2.6138157652596696e-06, "loss": 0.4376307725906372, "step": 14003 }, { "epoch": 17.18282208588957, "grad_norm": 0.2903517782688141, "learning_rate": 2.611581850040415e-06, "loss": 0.4969863295555115, "step": 14004 }, { "epoch": 17.1840490797546, "grad_norm": 0.28697526454925537, "learning_rate": 2.6093488372344487e-06, "loss": 0.3958936035633087, "step": 14005 }, { "epoch": 17.185276073619633, "grad_norm": 0.24295121431350708, "learning_rate": 2.6071167269317632e-06, "loss": 0.48119592666625977, "step": 14006 }, { "epoch": 17.18650306748466, "grad_norm": 0.24395312368869781, "learning_rate": 2.604885519222339e-06, "loss": 0.4221930503845215, "step": 14007 }, { "epoch": 17.187730061349694, "grad_norm": 0.25167134404182434, "learning_rate": 2.6026552141961034e-06, "loss": 0.5400319695472717, "step": 14008 }, { "epoch": 17.188957055214726, "grad_norm": 0.2921982407569885, "learning_rate": 2.600425811942961e-06, "loss": 0.6216886639595032, "step": 14009 }, { "epoch": 17.190184049079754, "grad_norm": 0.27703121304512024, "learning_rate": 2.5981973125527694e-06, "loss": 0.6734843254089355, "step": 14010 }, { "epoch": 17.191411042944786, "grad_norm": 0.2717866897583008, "learning_rate": 2.595969716115351e-06, "loss": 0.3673505187034607, "step": 14011 }, { "epoch": 17.192638036809814, "grad_norm": 0.2620726227760315, "learning_rate": 2.5937430227204933e-06, "loss": 0.6362513303756714, "step": 14012 }, { "epoch": 17.193865030674846, "grad_norm": 0.2783670723438263, "learning_rate": 2.591517232457949e-06, "loss": 0.5685480833053589, "step": 14013 }, { "epoch": 17.19509202453988, "grad_norm": 0.26011958718299866, "learning_rate": 2.589292345417435e-06, "loss": 0.49102404713630676, "step": 14014 }, { "epoch": 17.196319018404907, "grad_norm": 0.2867174446582794, "learning_rate": 2.5870683616886328e-06, "loss": 0.7665981650352478, "step": 14015 }, { "epoch": 17.19754601226994, "grad_norm": 0.21483144164085388, "learning_rate": 2.5848452813611783e-06, "loss": 0.25125694274902344, "step": 14016 }, { "epoch": 17.19877300613497, "grad_norm": 0.2872703969478607, "learning_rate": 2.5826231045246785e-06, "loss": 0.6846376657485962, "step": 14017 }, { "epoch": 17.2, "grad_norm": 0.2803758978843689, "learning_rate": 2.5804018312687054e-06, "loss": 0.4978848695755005, "step": 14018 }, { "epoch": 17.20122699386503, "grad_norm": 0.2746785283088684, "learning_rate": 2.578181461682794e-06, "loss": 0.48576512932777405, "step": 14019 }, { "epoch": 17.20245398773006, "grad_norm": 0.24277429282665253, "learning_rate": 2.575961995856441e-06, "loss": 0.40379253029823303, "step": 14020 }, { "epoch": 17.20368098159509, "grad_norm": 0.267910361289978, "learning_rate": 2.5737434338791e-06, "loss": 0.5956786870956421, "step": 14021 }, { "epoch": 17.204907975460124, "grad_norm": 0.2900562286376953, "learning_rate": 2.5715257758402002e-06, "loss": 0.5949351191520691, "step": 14022 }, { "epoch": 17.206134969325152, "grad_norm": 0.2859283983707428, "learning_rate": 2.569309021829125e-06, "loss": 0.6571930050849915, "step": 14023 }, { "epoch": 17.207361963190184, "grad_norm": 0.2571480870246887, "learning_rate": 2.5670931719352365e-06, "loss": 0.49887844920158386, "step": 14024 }, { "epoch": 17.208588957055216, "grad_norm": 0.25081387162208557, "learning_rate": 2.5648782262478382e-06, "loss": 0.5352137088775635, "step": 14025 }, { "epoch": 17.209815950920245, "grad_norm": 0.23818278312683105, "learning_rate": 2.562664184856209e-06, "loss": 0.4513722062110901, "step": 14026 }, { "epoch": 17.211042944785277, "grad_norm": 0.2596226632595062, "learning_rate": 2.5604510478495964e-06, "loss": 0.61190265417099, "step": 14027 }, { "epoch": 17.212269938650305, "grad_norm": 0.28486064076423645, "learning_rate": 2.5582388153172014e-06, "loss": 0.48273926973342896, "step": 14028 }, { "epoch": 17.213496932515337, "grad_norm": 0.2546261250972748, "learning_rate": 2.5560274873481975e-06, "loss": 0.3790125250816345, "step": 14029 }, { "epoch": 17.21472392638037, "grad_norm": 0.25884708762168884, "learning_rate": 2.5538170640317104e-06, "loss": 0.43711572885513306, "step": 14030 }, { "epoch": 17.215950920245398, "grad_norm": 0.23942896723747253, "learning_rate": 2.5516075454568383e-06, "loss": 0.4507067799568176, "step": 14031 }, { "epoch": 17.21717791411043, "grad_norm": 0.2572181522846222, "learning_rate": 2.5493989317126403e-06, "loss": 0.5740071535110474, "step": 14032 }, { "epoch": 17.21840490797546, "grad_norm": 0.27310284972190857, "learning_rate": 2.54719122288814e-06, "loss": 0.6670235991477966, "step": 14033 }, { "epoch": 17.21963190184049, "grad_norm": 0.25602278113365173, "learning_rate": 2.54498441907233e-06, "loss": 0.5722750425338745, "step": 14034 }, { "epoch": 17.220858895705522, "grad_norm": 0.26446637511253357, "learning_rate": 2.54277852035415e-06, "loss": 0.6973157525062561, "step": 14035 }, { "epoch": 17.22208588957055, "grad_norm": 0.27758342027664185, "learning_rate": 2.540573526822515e-06, "loss": 0.6407877206802368, "step": 14036 }, { "epoch": 17.223312883435582, "grad_norm": 0.26164934039115906, "learning_rate": 2.538369438566307e-06, "loss": 0.553597092628479, "step": 14037 }, { "epoch": 17.224539877300614, "grad_norm": 0.25523149967193604, "learning_rate": 2.5361662556743658e-06, "loss": 0.6432138681411743, "step": 14038 }, { "epoch": 17.225766871165643, "grad_norm": 0.2609499990940094, "learning_rate": 2.5339639782354957e-06, "loss": 0.5348799824714661, "step": 14039 }, { "epoch": 17.226993865030675, "grad_norm": 0.8181317448616028, "learning_rate": 2.5317626063384614e-06, "loss": 0.6440224647521973, "step": 14040 }, { "epoch": 17.228220858895707, "grad_norm": 0.2514221668243408, "learning_rate": 2.529562140071992e-06, "loss": 0.5863845348358154, "step": 14041 }, { "epoch": 17.229447852760735, "grad_norm": 0.24825331568717957, "learning_rate": 2.527362579524786e-06, "loss": 0.5377068519592285, "step": 14042 }, { "epoch": 17.230674846625767, "grad_norm": 0.25490230321884155, "learning_rate": 2.5251639247855006e-06, "loss": 0.3283660411834717, "step": 14043 }, { "epoch": 17.2319018404908, "grad_norm": 0.23091299831867218, "learning_rate": 2.522966175942762e-06, "loss": 0.5177837610244751, "step": 14044 }, { "epoch": 17.233128834355828, "grad_norm": 0.2613988220691681, "learning_rate": 2.5207693330851405e-06, "loss": 0.47478532791137695, "step": 14045 }, { "epoch": 17.23435582822086, "grad_norm": 0.2814788818359375, "learning_rate": 2.5185733963011994e-06, "loss": 0.7107734084129333, "step": 14046 }, { "epoch": 17.235582822085888, "grad_norm": 0.27056288719177246, "learning_rate": 2.516378365679445e-06, "loss": 0.6172555685043335, "step": 14047 }, { "epoch": 17.23680981595092, "grad_norm": 0.26199308037757874, "learning_rate": 2.5141842413083593e-06, "loss": 0.6171200275421143, "step": 14048 }, { "epoch": 17.238036809815952, "grad_norm": 0.21934087574481964, "learning_rate": 2.5119910232763695e-06, "loss": 0.430976927280426, "step": 14049 }, { "epoch": 17.23926380368098, "grad_norm": 0.28617942333221436, "learning_rate": 2.5097987116718845e-06, "loss": 0.6836156845092773, "step": 14050 }, { "epoch": 17.240490797546013, "grad_norm": 0.27850988507270813, "learning_rate": 2.5076073065832677e-06, "loss": 0.6908947229385376, "step": 14051 }, { "epoch": 17.241717791411045, "grad_norm": 0.2827862799167633, "learning_rate": 2.505416808098851e-06, "loss": 0.7500824928283691, "step": 14052 }, { "epoch": 17.242944785276073, "grad_norm": 0.2826976478099823, "learning_rate": 2.503227216306933e-06, "loss": 0.5592548847198486, "step": 14053 }, { "epoch": 17.244171779141105, "grad_norm": 0.3022567331790924, "learning_rate": 2.5010385312957547e-06, "loss": 0.6697362661361694, "step": 14054 }, { "epoch": 17.245398773006134, "grad_norm": 0.25340503454208374, "learning_rate": 2.4988507531535475e-06, "loss": 0.5521824955940247, "step": 14055 }, { "epoch": 17.246625766871166, "grad_norm": 0.24769140779972076, "learning_rate": 2.496663881968489e-06, "loss": 0.6366980075836182, "step": 14056 }, { "epoch": 17.247852760736198, "grad_norm": 0.2635798454284668, "learning_rate": 2.4944779178287276e-06, "loss": 0.5380570888519287, "step": 14057 }, { "epoch": 17.249079754601226, "grad_norm": 0.2269655168056488, "learning_rate": 2.4922928608223815e-06, "loss": 0.30944010615348816, "step": 14058 }, { "epoch": 17.250306748466258, "grad_norm": 0.2487349659204483, "learning_rate": 2.4901087110375088e-06, "loss": 0.7381583452224731, "step": 14059 }, { "epoch": 17.25153374233129, "grad_norm": 0.2794586420059204, "learning_rate": 2.487925468562152e-06, "loss": 0.5175014138221741, "step": 14060 }, { "epoch": 17.25276073619632, "grad_norm": 0.2681513726711273, "learning_rate": 2.4857431334843168e-06, "loss": 0.573009729385376, "step": 14061 }, { "epoch": 17.25398773006135, "grad_norm": 0.25059276819229126, "learning_rate": 2.4835617058919627e-06, "loss": 0.6183465719223022, "step": 14062 }, { "epoch": 17.25521472392638, "grad_norm": 0.24931471049785614, "learning_rate": 2.481381185873019e-06, "loss": 0.49548906087875366, "step": 14063 }, { "epoch": 17.25644171779141, "grad_norm": 0.24352380633354187, "learning_rate": 2.4792015735153747e-06, "loss": 0.5853252410888672, "step": 14064 }, { "epoch": 17.257668711656443, "grad_norm": 0.3013039231300354, "learning_rate": 2.477022868906881e-06, "loss": 0.6254978179931641, "step": 14065 }, { "epoch": 17.25889570552147, "grad_norm": 0.26183146238327026, "learning_rate": 2.4748450721353567e-06, "loss": 0.5190117955207825, "step": 14066 }, { "epoch": 17.260122699386503, "grad_norm": 0.23902291059494019, "learning_rate": 2.4726681832885845e-06, "loss": 0.42637717723846436, "step": 14067 }, { "epoch": 17.261349693251535, "grad_norm": 0.28445664048194885, "learning_rate": 2.470492202454308e-06, "loss": 0.6525983810424805, "step": 14068 }, { "epoch": 17.262576687116564, "grad_norm": 0.2621351182460785, "learning_rate": 2.468317129720235e-06, "loss": 0.6272627711296082, "step": 14069 }, { "epoch": 17.263803680981596, "grad_norm": 0.2236528992652893, "learning_rate": 2.4661429651740368e-06, "loss": 0.47021105885505676, "step": 14070 }, { "epoch": 17.265030674846624, "grad_norm": 0.2831626236438751, "learning_rate": 2.4639697089033465e-06, "loss": 0.59941565990448, "step": 14071 }, { "epoch": 17.266257668711656, "grad_norm": 0.28856366872787476, "learning_rate": 2.4617973609957656e-06, "loss": 0.7405005097389221, "step": 14072 }, { "epoch": 17.26748466257669, "grad_norm": 0.2856823205947876, "learning_rate": 2.4596259215388494e-06, "loss": 0.7936815023422241, "step": 14073 }, { "epoch": 17.268711656441717, "grad_norm": 0.2202194333076477, "learning_rate": 2.4574553906201255e-06, "loss": 0.5124350190162659, "step": 14074 }, { "epoch": 17.26993865030675, "grad_norm": 0.3006584942340851, "learning_rate": 2.4552857683270814e-06, "loss": 0.5732966661453247, "step": 14075 }, { "epoch": 17.27116564417178, "grad_norm": 0.2729976177215576, "learning_rate": 2.453117054747167e-06, "loss": 0.6774337291717529, "step": 14076 }, { "epoch": 17.27239263803681, "grad_norm": 0.2496338039636612, "learning_rate": 2.4509492499678045e-06, "loss": 0.654494047164917, "step": 14077 }, { "epoch": 17.27361963190184, "grad_norm": 0.26426902413368225, "learning_rate": 2.4487823540763595e-06, "loss": 0.6133080720901489, "step": 14078 }, { "epoch": 17.27484662576687, "grad_norm": 0.2745043933391571, "learning_rate": 2.4466163671601788e-06, "loss": 0.5665345191955566, "step": 14079 }, { "epoch": 17.2760736196319, "grad_norm": 0.2634892165660858, "learning_rate": 2.4444512893065707e-06, "loss": 0.49946653842926025, "step": 14080 }, { "epoch": 17.277300613496934, "grad_norm": 0.2680230438709259, "learning_rate": 2.4422871206028013e-06, "loss": 0.5856625437736511, "step": 14081 }, { "epoch": 17.278527607361962, "grad_norm": 0.3775702714920044, "learning_rate": 2.440123861136104e-06, "loss": 0.7326853275299072, "step": 14082 }, { "epoch": 17.279754601226994, "grad_norm": 0.27599814534187317, "learning_rate": 2.4379615109936663e-06, "loss": 0.6979066133499146, "step": 14083 }, { "epoch": 17.280981595092026, "grad_norm": 0.24499300122261047, "learning_rate": 2.435800070262653e-06, "loss": 0.5789518356323242, "step": 14084 }, { "epoch": 17.282208588957054, "grad_norm": 0.28458940982818604, "learning_rate": 2.433639539030183e-06, "loss": 0.5009638667106628, "step": 14085 }, { "epoch": 17.283435582822086, "grad_norm": 0.3089331090450287, "learning_rate": 2.431479917383339e-06, "loss": 0.7651395797729492, "step": 14086 }, { "epoch": 17.284662576687115, "grad_norm": 0.32442566752433777, "learning_rate": 2.429321205409177e-06, "loss": 0.756152331829071, "step": 14087 }, { "epoch": 17.285889570552147, "grad_norm": 0.2494388073682785, "learning_rate": 2.4271634031947042e-06, "loss": 0.5939623117446899, "step": 14088 }, { "epoch": 17.28711656441718, "grad_norm": 0.2742099165916443, "learning_rate": 2.4250065108268937e-06, "loss": 0.5987116098403931, "step": 14089 }, { "epoch": 17.288343558282207, "grad_norm": 0.2562132775783539, "learning_rate": 2.4228505283926784e-06, "loss": 0.6856917142868042, "step": 14090 }, { "epoch": 17.28957055214724, "grad_norm": 0.2655978202819824, "learning_rate": 2.420695455978969e-06, "loss": 0.780966579914093, "step": 14091 }, { "epoch": 17.29079754601227, "grad_norm": 0.2361258864402771, "learning_rate": 2.418541293672635e-06, "loss": 0.5503898859024048, "step": 14092 }, { "epoch": 17.2920245398773, "grad_norm": 0.2621472179889679, "learning_rate": 2.416388041560491e-06, "loss": 0.5185408592224121, "step": 14093 }, { "epoch": 17.293251533742332, "grad_norm": 0.27832460403442383, "learning_rate": 2.4142356997293366e-06, "loss": 0.6456698179244995, "step": 14094 }, { "epoch": 17.29447852760736, "grad_norm": 0.2730938494205475, "learning_rate": 2.4120842682659237e-06, "loss": 0.6056338548660278, "step": 14095 }, { "epoch": 17.295705521472392, "grad_norm": 0.27178317308425903, "learning_rate": 2.4099337472569704e-06, "loss": 0.48659366369247437, "step": 14096 }, { "epoch": 17.296932515337424, "grad_norm": 0.22479696571826935, "learning_rate": 2.4077841367891625e-06, "loss": 0.3640417158603668, "step": 14097 }, { "epoch": 17.298159509202453, "grad_norm": 0.2362067997455597, "learning_rate": 2.4056354369491386e-06, "loss": 0.4483206272125244, "step": 14098 }, { "epoch": 17.299386503067485, "grad_norm": 0.2607211768627167, "learning_rate": 2.4034876478235076e-06, "loss": 0.3307653069496155, "step": 14099 }, { "epoch": 17.300613496932517, "grad_norm": 0.2943900227546692, "learning_rate": 2.401340769498844e-06, "loss": 0.6315137147903442, "step": 14100 }, { "epoch": 17.301840490797545, "grad_norm": 0.26566001772880554, "learning_rate": 2.3991948020616774e-06, "loss": 0.6508532762527466, "step": 14101 }, { "epoch": 17.303067484662577, "grad_norm": 0.28146103024482727, "learning_rate": 2.3970497455985173e-06, "loss": 0.5133554935455322, "step": 14102 }, { "epoch": 17.30429447852761, "grad_norm": 0.26554325222969055, "learning_rate": 2.3949056001958096e-06, "loss": 0.7191063165664673, "step": 14103 }, { "epoch": 17.305521472392638, "grad_norm": 0.26158618927001953, "learning_rate": 2.392762365939985e-06, "loss": 0.7264138460159302, "step": 14104 }, { "epoch": 17.30674846625767, "grad_norm": 0.23621001839637756, "learning_rate": 2.3906200429174325e-06, "loss": 0.6639288663864136, "step": 14105 }, { "epoch": 17.307975460122698, "grad_norm": 0.25526946783065796, "learning_rate": 2.3884786312145024e-06, "loss": 0.5747429132461548, "step": 14106 }, { "epoch": 17.30920245398773, "grad_norm": 0.26311734318733215, "learning_rate": 2.386338130917512e-06, "loss": 0.6719235777854919, "step": 14107 }, { "epoch": 17.310429447852762, "grad_norm": 0.28867825865745544, "learning_rate": 2.384198542112731e-06, "loss": 0.6562351584434509, "step": 14108 }, { "epoch": 17.31165644171779, "grad_norm": 0.2658907175064087, "learning_rate": 2.3820598648864044e-06, "loss": 0.5952780842781067, "step": 14109 }, { "epoch": 17.312883435582823, "grad_norm": 0.24261663854122162, "learning_rate": 2.3799220993247385e-06, "loss": 0.47332265973091125, "step": 14110 }, { "epoch": 17.314110429447855, "grad_norm": 0.2522696852684021, "learning_rate": 2.3777852455138945e-06, "loss": 0.4239191710948944, "step": 14111 }, { "epoch": 17.315337423312883, "grad_norm": 0.2711424231529236, "learning_rate": 2.375649303540009e-06, "loss": 0.6984335780143738, "step": 14112 }, { "epoch": 17.316564417177915, "grad_norm": 0.2408452033996582, "learning_rate": 2.3735142734891745e-06, "loss": 0.6887401938438416, "step": 14113 }, { "epoch": 17.317791411042943, "grad_norm": 0.3064673840999603, "learning_rate": 2.3713801554474447e-06, "loss": 0.6434744596481323, "step": 14114 }, { "epoch": 17.319018404907975, "grad_norm": 0.28817063570022583, "learning_rate": 2.3692469495008418e-06, "loss": 0.592853844165802, "step": 14115 }, { "epoch": 17.320245398773007, "grad_norm": 0.23895983397960663, "learning_rate": 2.3671146557353555e-06, "loss": 0.6107993125915527, "step": 14116 }, { "epoch": 17.321472392638036, "grad_norm": 0.27446985244750977, "learning_rate": 2.3649832742369205e-06, "loss": 0.5447536110877991, "step": 14117 }, { "epoch": 17.322699386503068, "grad_norm": 0.24990591406822205, "learning_rate": 2.3628528050914534e-06, "loss": 0.5874862670898438, "step": 14118 }, { "epoch": 17.3239263803681, "grad_norm": 0.2866199016571045, "learning_rate": 2.3607232483848276e-06, "loss": 0.5626012086868286, "step": 14119 }, { "epoch": 17.32515337423313, "grad_norm": 0.2560909390449524, "learning_rate": 2.3585946042028767e-06, "loss": 0.4875628352165222, "step": 14120 }, { "epoch": 17.32638036809816, "grad_norm": 0.2613809108734131, "learning_rate": 2.3564668726314075e-06, "loss": 0.5875483155250549, "step": 14121 }, { "epoch": 17.32760736196319, "grad_norm": 0.2777126729488373, "learning_rate": 2.354340053756174e-06, "loss": 0.5752010345458984, "step": 14122 }, { "epoch": 17.32883435582822, "grad_norm": 0.29209503531455994, "learning_rate": 2.352214147662904e-06, "loss": 0.49522697925567627, "step": 14123 }, { "epoch": 17.330061349693253, "grad_norm": 0.27508413791656494, "learning_rate": 2.3500891544372875e-06, "loss": 0.812987208366394, "step": 14124 }, { "epoch": 17.33128834355828, "grad_norm": 0.2912757396697998, "learning_rate": 2.347965074164979e-06, "loss": 0.507900059223175, "step": 14125 }, { "epoch": 17.332515337423313, "grad_norm": 0.2605324387550354, "learning_rate": 2.345841906931595e-06, "loss": 0.47681742906570435, "step": 14126 }, { "epoch": 17.333742331288345, "grad_norm": 0.28098180890083313, "learning_rate": 2.3437196528227094e-06, "loss": 0.6769083738327026, "step": 14127 }, { "epoch": 17.334969325153374, "grad_norm": 0.2809142470359802, "learning_rate": 2.341598311923868e-06, "loss": 0.5997875928878784, "step": 14128 }, { "epoch": 17.336196319018406, "grad_norm": 0.2620401084423065, "learning_rate": 2.339477884320573e-06, "loss": 0.7011610865592957, "step": 14129 }, { "epoch": 17.337423312883434, "grad_norm": 0.294161319732666, "learning_rate": 2.337358370098294e-06, "loss": 0.6707825660705566, "step": 14130 }, { "epoch": 17.338650306748466, "grad_norm": 0.3394750952720642, "learning_rate": 2.3352397693424705e-06, "loss": 0.45965689420700073, "step": 14131 }, { "epoch": 17.339877300613498, "grad_norm": 0.30315396189689636, "learning_rate": 2.3331220821384835e-06, "loss": 0.6132648587226868, "step": 14132 }, { "epoch": 17.341104294478527, "grad_norm": 0.2676992118358612, "learning_rate": 2.331005308571696e-06, "loss": 0.6613229513168335, "step": 14133 }, { "epoch": 17.34233128834356, "grad_norm": 0.274837464094162, "learning_rate": 2.328889448727428e-06, "loss": 0.7432881593704224, "step": 14134 }, { "epoch": 17.34355828220859, "grad_norm": 0.260711133480072, "learning_rate": 2.326774502690976e-06, "loss": 0.7694916725158691, "step": 14135 }, { "epoch": 17.34478527607362, "grad_norm": 0.24647700786590576, "learning_rate": 2.324660470547571e-06, "loss": 0.5654014945030212, "step": 14136 }, { "epoch": 17.34601226993865, "grad_norm": 0.23286710679531097, "learning_rate": 2.3225473523824316e-06, "loss": 0.315471351146698, "step": 14137 }, { "epoch": 17.34723926380368, "grad_norm": 0.29065757989883423, "learning_rate": 2.3204351482807307e-06, "loss": 0.6416515111923218, "step": 14138 }, { "epoch": 17.34846625766871, "grad_norm": 0.30068737268447876, "learning_rate": 2.3183238583276034e-06, "loss": 0.6800721883773804, "step": 14139 }, { "epoch": 17.349693251533743, "grad_norm": 0.2707381546497345, "learning_rate": 2.3162134826081594e-06, "loss": 0.5599082708358765, "step": 14140 }, { "epoch": 17.350920245398772, "grad_norm": 0.24687333405017853, "learning_rate": 2.3141040212074444e-06, "loss": 0.4379613399505615, "step": 14141 }, { "epoch": 17.352147239263804, "grad_norm": 0.25076746940612793, "learning_rate": 2.311995474210496e-06, "loss": 0.5802950859069824, "step": 14142 }, { "epoch": 17.353374233128836, "grad_norm": 0.2799932360649109, "learning_rate": 2.3098878417023016e-06, "loss": 0.6274938583374023, "step": 14143 }, { "epoch": 17.354601226993864, "grad_norm": 0.27728739380836487, "learning_rate": 2.307781123767816e-06, "loss": 0.74831223487854, "step": 14144 }, { "epoch": 17.355828220858896, "grad_norm": 0.2960984408855438, "learning_rate": 2.305675320491957e-06, "loss": 0.7403827905654907, "step": 14145 }, { "epoch": 17.357055214723925, "grad_norm": 0.26678964495658875, "learning_rate": 2.3035704319595925e-06, "loss": 0.5400561094284058, "step": 14146 }, { "epoch": 17.358282208588957, "grad_norm": 0.2760268449783325, "learning_rate": 2.3014664582555744e-06, "loss": 0.5884984731674194, "step": 14147 }, { "epoch": 17.35950920245399, "grad_norm": 0.28279808163642883, "learning_rate": 2.2993633994647046e-06, "loss": 0.5692579746246338, "step": 14148 }, { "epoch": 17.360736196319017, "grad_norm": 0.2624823749065399, "learning_rate": 2.297261255671751e-06, "loss": 0.5028597712516785, "step": 14149 }, { "epoch": 17.36196319018405, "grad_norm": 0.24172267317771912, "learning_rate": 2.2951600269614515e-06, "loss": 0.5682690739631653, "step": 14150 }, { "epoch": 17.36319018404908, "grad_norm": 0.2729721963405609, "learning_rate": 2.2930597134184885e-06, "loss": 0.6415613889694214, "step": 14151 }, { "epoch": 17.36441717791411, "grad_norm": 0.22844068706035614, "learning_rate": 2.290960315127527e-06, "loss": 0.5353841781616211, "step": 14152 }, { "epoch": 17.36564417177914, "grad_norm": 0.2604120969772339, "learning_rate": 2.2888618321731893e-06, "loss": 0.5927505493164062, "step": 14153 }, { "epoch": 17.36687116564417, "grad_norm": 0.2523173391819, "learning_rate": 2.286764264640054e-06, "loss": 0.6099991798400879, "step": 14154 }, { "epoch": 17.368098159509202, "grad_norm": 0.24564994871616364, "learning_rate": 2.2846676126126786e-06, "loss": 0.6265543699264526, "step": 14155 }, { "epoch": 17.369325153374234, "grad_norm": 0.24841798841953278, "learning_rate": 2.28257187617556e-06, "loss": 0.5725528001785278, "step": 14156 }, { "epoch": 17.370552147239263, "grad_norm": 0.2960120141506195, "learning_rate": 2.2804770554131686e-06, "loss": 0.6511860489845276, "step": 14157 }, { "epoch": 17.371779141104295, "grad_norm": 0.30998384952545166, "learning_rate": 2.2783831504099573e-06, "loss": 0.7672066688537598, "step": 14158 }, { "epoch": 17.373006134969327, "grad_norm": 0.270404189825058, "learning_rate": 2.276290161250322e-06, "loss": 0.5004845857620239, "step": 14159 }, { "epoch": 17.374233128834355, "grad_norm": 0.25077009201049805, "learning_rate": 2.2741980880186144e-06, "loss": 0.5321340560913086, "step": 14160 }, { "epoch": 17.375460122699387, "grad_norm": 0.26297950744628906, "learning_rate": 2.2721069307991673e-06, "loss": 0.6179609298706055, "step": 14161 }, { "epoch": 17.376687116564415, "grad_norm": 0.27349260449409485, "learning_rate": 2.2700166896762658e-06, "loss": 0.6432273387908936, "step": 14162 }, { "epoch": 17.377914110429447, "grad_norm": 0.2726494371891022, "learning_rate": 2.2679273647341655e-06, "loss": 0.5934302806854248, "step": 14163 }, { "epoch": 17.37914110429448, "grad_norm": 0.30938246846199036, "learning_rate": 2.2658389560570814e-06, "loss": 0.6731284856796265, "step": 14164 }, { "epoch": 17.380368098159508, "grad_norm": 0.2620421051979065, "learning_rate": 2.2637514637291857e-06, "loss": 0.6480493545532227, "step": 14165 }, { "epoch": 17.38159509202454, "grad_norm": 0.261222243309021, "learning_rate": 2.261664887834625e-06, "loss": 0.6007401943206787, "step": 14166 }, { "epoch": 17.382822085889572, "grad_norm": 0.3132525384426117, "learning_rate": 2.259579228457498e-06, "loss": 0.6343381404876709, "step": 14167 }, { "epoch": 17.3840490797546, "grad_norm": 0.22833102941513062, "learning_rate": 2.257494485681874e-06, "loss": 0.4365282952785492, "step": 14168 }, { "epoch": 17.385276073619632, "grad_norm": 0.28205689787864685, "learning_rate": 2.2554106595917917e-06, "loss": 0.7494641542434692, "step": 14169 }, { "epoch": 17.38650306748466, "grad_norm": 0.26757168769836426, "learning_rate": 2.253327750271228e-06, "loss": 0.5830202102661133, "step": 14170 }, { "epoch": 17.387730061349693, "grad_norm": 0.25152432918548584, "learning_rate": 2.251245757804149e-06, "loss": 0.594530463218689, "step": 14171 }, { "epoch": 17.388957055214725, "grad_norm": 0.31176555156707764, "learning_rate": 2.2491646822744713e-06, "loss": 0.8768845796585083, "step": 14172 }, { "epoch": 17.390184049079753, "grad_norm": 0.25852349400520325, "learning_rate": 2.247084523766077e-06, "loss": 0.7667372226715088, "step": 14173 }, { "epoch": 17.391411042944785, "grad_norm": 0.3090110123157501, "learning_rate": 2.2450052823628194e-06, "loss": 0.8316847681999207, "step": 14174 }, { "epoch": 17.392638036809817, "grad_norm": 0.28689199686050415, "learning_rate": 2.2429269581484945e-06, "loss": 0.6188992261886597, "step": 14175 }, { "epoch": 17.393865030674846, "grad_norm": 0.2762552797794342, "learning_rate": 2.2408495512068777e-06, "loss": 0.7352746725082397, "step": 14176 }, { "epoch": 17.395092024539878, "grad_norm": 0.25675174593925476, "learning_rate": 2.2387730616217045e-06, "loss": 0.7203073501586914, "step": 14177 }, { "epoch": 17.39631901840491, "grad_norm": 0.35934901237487793, "learning_rate": 2.236697489476672e-06, "loss": 0.5321892499923706, "step": 14178 }, { "epoch": 17.397546012269938, "grad_norm": 0.31514859199523926, "learning_rate": 2.2346228348554404e-06, "loss": 0.5799320936203003, "step": 14179 }, { "epoch": 17.39877300613497, "grad_norm": 0.2710852324962616, "learning_rate": 2.2325490978416326e-06, "loss": 0.6133052110671997, "step": 14180 }, { "epoch": 17.4, "grad_norm": 0.2693232595920563, "learning_rate": 2.2304762785188392e-06, "loss": 0.4722481369972229, "step": 14181 }, { "epoch": 17.40122699386503, "grad_norm": 0.2562851011753082, "learning_rate": 2.2284043769706027e-06, "loss": 0.5111571550369263, "step": 14182 }, { "epoch": 17.402453987730063, "grad_norm": 0.2737191915512085, "learning_rate": 2.226333393280447e-06, "loss": 0.5380007028579712, "step": 14183 }, { "epoch": 17.40368098159509, "grad_norm": 0.2589958906173706, "learning_rate": 2.224263327531831e-06, "loss": 0.6060973405838013, "step": 14184 }, { "epoch": 17.404907975460123, "grad_norm": 0.2994796335697174, "learning_rate": 2.222194179808204e-06, "loss": 0.7582780122756958, "step": 14185 }, { "epoch": 17.406134969325155, "grad_norm": 0.2749730944633484, "learning_rate": 2.220125950192967e-06, "loss": 0.6093894839286804, "step": 14186 }, { "epoch": 17.407361963190183, "grad_norm": 0.26866111159324646, "learning_rate": 2.2180586387694795e-06, "loss": 0.41364383697509766, "step": 14187 }, { "epoch": 17.408588957055215, "grad_norm": 0.26681405305862427, "learning_rate": 2.215992245621079e-06, "loss": 0.5014191269874573, "step": 14188 }, { "epoch": 17.409815950920244, "grad_norm": 0.2762863039970398, "learning_rate": 2.2139267708310456e-06, "loss": 0.5099301338195801, "step": 14189 }, { "epoch": 17.411042944785276, "grad_norm": 0.2901003956794739, "learning_rate": 2.2118622144826325e-06, "loss": 0.527709424495697, "step": 14190 }, { "epoch": 17.412269938650308, "grad_norm": 0.31401327252388, "learning_rate": 2.209798576659061e-06, "loss": 0.6207336187362671, "step": 14191 }, { "epoch": 17.413496932515336, "grad_norm": 0.2412451058626175, "learning_rate": 2.2077358574435103e-06, "loss": 0.442482590675354, "step": 14192 }, { "epoch": 17.41472392638037, "grad_norm": 0.27121371030807495, "learning_rate": 2.2056740569191216e-06, "loss": 0.5201159119606018, "step": 14193 }, { "epoch": 17.4159509202454, "grad_norm": 0.2217635065317154, "learning_rate": 2.203613175169006e-06, "loss": 0.3135730028152466, "step": 14194 }, { "epoch": 17.41717791411043, "grad_norm": 0.30793827772140503, "learning_rate": 2.2015532122762195e-06, "loss": 0.566200852394104, "step": 14195 }, { "epoch": 17.41840490797546, "grad_norm": 0.2583177387714386, "learning_rate": 2.1994941683237986e-06, "loss": 0.668103814125061, "step": 14196 }, { "epoch": 17.41963190184049, "grad_norm": 0.21114033460617065, "learning_rate": 2.1974360433947405e-06, "loss": 0.4154367446899414, "step": 14197 }, { "epoch": 17.42085889570552, "grad_norm": 0.2619395852088928, "learning_rate": 2.1953788375720013e-06, "loss": 0.539107084274292, "step": 14198 }, { "epoch": 17.422085889570553, "grad_norm": 0.2520110607147217, "learning_rate": 2.1933225509385037e-06, "loss": 0.5301980376243591, "step": 14199 }, { "epoch": 17.42331288343558, "grad_norm": 0.24303697049617767, "learning_rate": 2.1912671835771227e-06, "loss": 0.5003727674484253, "step": 14200 }, { "epoch": 17.424539877300614, "grad_norm": 0.2752231955528259, "learning_rate": 2.189212735570703e-06, "loss": 0.4848424792289734, "step": 14201 }, { "epoch": 17.425766871165646, "grad_norm": 0.29398760199546814, "learning_rate": 2.187159207002068e-06, "loss": 0.5320878028869629, "step": 14202 }, { "epoch": 17.426993865030674, "grad_norm": 0.25394800305366516, "learning_rate": 2.1851065979539816e-06, "loss": 0.7638729810714722, "step": 14203 }, { "epoch": 17.428220858895706, "grad_norm": 0.2719709575176239, "learning_rate": 2.183054908509177e-06, "loss": 0.45516133308410645, "step": 14204 }, { "epoch": 17.429447852760735, "grad_norm": 0.25026533007621765, "learning_rate": 2.1810041387503504e-06, "loss": 0.547872006893158, "step": 14205 }, { "epoch": 17.430674846625767, "grad_norm": 0.27497759461402893, "learning_rate": 2.1789542887601655e-06, "loss": 0.6336352825164795, "step": 14206 }, { "epoch": 17.4319018404908, "grad_norm": 0.24747693538665771, "learning_rate": 2.176905358621248e-06, "loss": 0.6198071241378784, "step": 14207 }, { "epoch": 17.433128834355827, "grad_norm": 0.2317914068698883, "learning_rate": 2.1748573484161846e-06, "loss": 0.41667914390563965, "step": 14208 }, { "epoch": 17.43435582822086, "grad_norm": 0.2414952963590622, "learning_rate": 2.172810258227517e-06, "loss": 0.5336410999298096, "step": 14209 }, { "epoch": 17.43558282208589, "grad_norm": 0.2329815924167633, "learning_rate": 2.1707640881377638e-06, "loss": 0.4678785800933838, "step": 14210 }, { "epoch": 17.43680981595092, "grad_norm": 0.25768008828163147, "learning_rate": 2.1687188382294e-06, "loss": 0.5822867751121521, "step": 14211 }, { "epoch": 17.43803680981595, "grad_norm": 0.24382169544696808, "learning_rate": 2.166674508584862e-06, "loss": 0.4501253366470337, "step": 14212 }, { "epoch": 17.43926380368098, "grad_norm": 0.2574690878391266, "learning_rate": 2.1646310992865545e-06, "loss": 0.5397832989692688, "step": 14213 }, { "epoch": 17.440490797546012, "grad_norm": 0.2977140247821808, "learning_rate": 2.162588610416838e-06, "loss": 0.8335714340209961, "step": 14214 }, { "epoch": 17.441717791411044, "grad_norm": 0.2549656629562378, "learning_rate": 2.1605470420580367e-06, "loss": 0.6301568746566772, "step": 14215 }, { "epoch": 17.442944785276072, "grad_norm": 0.2758943438529968, "learning_rate": 2.1585063942924448e-06, "loss": 0.49957436323165894, "step": 14216 }, { "epoch": 17.444171779141104, "grad_norm": 0.26675698161125183, "learning_rate": 2.156466667202317e-06, "loss": 0.6860071420669556, "step": 14217 }, { "epoch": 17.445398773006136, "grad_norm": 0.28336361050605774, "learning_rate": 2.1544278608698676e-06, "loss": 0.7743538618087769, "step": 14218 }, { "epoch": 17.446625766871165, "grad_norm": 0.2510300278663635, "learning_rate": 2.15238997537727e-06, "loss": 0.5287899971008301, "step": 14219 }, { "epoch": 17.447852760736197, "grad_norm": 0.2613190710544586, "learning_rate": 2.1503530108066686e-06, "loss": 0.7045417428016663, "step": 14220 }, { "epoch": 17.449079754601225, "grad_norm": 0.302320271730423, "learning_rate": 2.1483169672401686e-06, "loss": 0.6648386716842651, "step": 14221 }, { "epoch": 17.450306748466257, "grad_norm": 0.24511896073818207, "learning_rate": 2.146281844759837e-06, "loss": 0.4408983290195465, "step": 14222 }, { "epoch": 17.45153374233129, "grad_norm": 0.26661714911460876, "learning_rate": 2.1442476434477025e-06, "loss": 0.5604907274246216, "step": 14223 }, { "epoch": 17.452760736196318, "grad_norm": 0.28718048334121704, "learning_rate": 2.142214363385761e-06, "loss": 0.47932296991348267, "step": 14224 }, { "epoch": 17.45398773006135, "grad_norm": 0.29416704177856445, "learning_rate": 2.1401820046559633e-06, "loss": 0.4842163324356079, "step": 14225 }, { "epoch": 17.45521472392638, "grad_norm": 0.3308861553668976, "learning_rate": 2.138150567340233e-06, "loss": 0.5036208033561707, "step": 14226 }, { "epoch": 17.45644171779141, "grad_norm": 0.2766614854335785, "learning_rate": 2.1361200515204515e-06, "loss": 0.7492249011993408, "step": 14227 }, { "epoch": 17.457668711656442, "grad_norm": 0.28058624267578125, "learning_rate": 2.134090457278459e-06, "loss": 0.5899215340614319, "step": 14228 }, { "epoch": 17.45889570552147, "grad_norm": 0.27944323420524597, "learning_rate": 2.1320617846960626e-06, "loss": 0.7315521240234375, "step": 14229 }, { "epoch": 17.460122699386503, "grad_norm": 0.2758376896381378, "learning_rate": 2.1300340338550358e-06, "loss": 0.7607177495956421, "step": 14230 }, { "epoch": 17.461349693251535, "grad_norm": 0.27118414640426636, "learning_rate": 2.128007204837107e-06, "loss": 0.6788593530654907, "step": 14231 }, { "epoch": 17.462576687116563, "grad_norm": 0.2620147466659546, "learning_rate": 2.1259812977239805e-06, "loss": 0.6251919269561768, "step": 14232 }, { "epoch": 17.463803680981595, "grad_norm": 0.27348196506500244, "learning_rate": 2.1239563125973054e-06, "loss": 0.7429472208023071, "step": 14233 }, { "epoch": 17.465030674846627, "grad_norm": 0.2692507207393646, "learning_rate": 2.121932249538708e-06, "loss": 0.49546271562576294, "step": 14234 }, { "epoch": 17.466257668711656, "grad_norm": 0.2759436070919037, "learning_rate": 2.1199091086297697e-06, "loss": 0.6910759806632996, "step": 14235 }, { "epoch": 17.467484662576688, "grad_norm": 0.3031865656375885, "learning_rate": 2.1178868899520395e-06, "loss": 0.5823335647583008, "step": 14236 }, { "epoch": 17.46871165644172, "grad_norm": 0.2707490921020508, "learning_rate": 2.1158655935870323e-06, "loss": 0.5671652555465698, "step": 14237 }, { "epoch": 17.469938650306748, "grad_norm": 0.26306018233299255, "learning_rate": 2.113845219616209e-06, "loss": 0.5472205281257629, "step": 14238 }, { "epoch": 17.47116564417178, "grad_norm": 0.2238406240940094, "learning_rate": 2.1118257681210145e-06, "loss": 0.4495799243450165, "step": 14239 }, { "epoch": 17.47239263803681, "grad_norm": 0.23403900861740112, "learning_rate": 2.10980723918284e-06, "loss": 0.5389083623886108, "step": 14240 }, { "epoch": 17.47361963190184, "grad_norm": 0.3092230260372162, "learning_rate": 2.107789632883053e-06, "loss": 0.515608549118042, "step": 14241 }, { "epoch": 17.474846625766872, "grad_norm": 0.2831712067127228, "learning_rate": 2.105772949302981e-06, "loss": 0.6425436735153198, "step": 14242 }, { "epoch": 17.4760736196319, "grad_norm": 0.26415395736694336, "learning_rate": 2.1037571885239e-06, "loss": 0.5610581636428833, "step": 14243 }, { "epoch": 17.477300613496933, "grad_norm": 0.23610694706439972, "learning_rate": 2.1017423506270647e-06, "loss": 0.4166916012763977, "step": 14244 }, { "epoch": 17.478527607361965, "grad_norm": 0.27138617634773254, "learning_rate": 2.0997284356936818e-06, "loss": 0.5039281845092773, "step": 14245 }, { "epoch": 17.479754601226993, "grad_norm": 0.31349849700927734, "learning_rate": 2.09771544380494e-06, "loss": 0.7553391456604004, "step": 14246 }, { "epoch": 17.480981595092025, "grad_norm": 0.27916693687438965, "learning_rate": 2.0957033750419685e-06, "loss": 0.5558796525001526, "step": 14247 }, { "epoch": 17.482208588957054, "grad_norm": 0.23819679021835327, "learning_rate": 2.093692229485866e-06, "loss": 0.4379676878452301, "step": 14248 }, { "epoch": 17.483435582822086, "grad_norm": 0.31998586654663086, "learning_rate": 2.091682007217699e-06, "loss": 0.6871703267097473, "step": 14249 }, { "epoch": 17.484662576687118, "grad_norm": 0.25060275197029114, "learning_rate": 2.0896727083184935e-06, "loss": 0.647017240524292, "step": 14250 }, { "epoch": 17.485889570552146, "grad_norm": 0.23023341596126556, "learning_rate": 2.0876643328692436e-06, "loss": 0.4409196376800537, "step": 14251 }, { "epoch": 17.487116564417178, "grad_norm": 0.23021215200424194, "learning_rate": 2.0856568809508924e-06, "loss": 0.34741830825805664, "step": 14252 }, { "epoch": 17.48834355828221, "grad_norm": 0.2691667675971985, "learning_rate": 2.083650352644359e-06, "loss": 0.6022841334342957, "step": 14253 }, { "epoch": 17.48957055214724, "grad_norm": 0.2483062595129013, "learning_rate": 2.08164474803052e-06, "loss": 0.6737663745880127, "step": 14254 }, { "epoch": 17.49079754601227, "grad_norm": 0.2802790105342865, "learning_rate": 2.0796400671902166e-06, "loss": 0.6129090785980225, "step": 14255 }, { "epoch": 17.4920245398773, "grad_norm": 0.261491984128952, "learning_rate": 2.077636310204256e-06, "loss": 0.5800438523292542, "step": 14256 }, { "epoch": 17.49325153374233, "grad_norm": 0.24353653192520142, "learning_rate": 2.0756334771533937e-06, "loss": 0.5311113595962524, "step": 14257 }, { "epoch": 17.494478527607363, "grad_norm": 0.2802468240261078, "learning_rate": 2.0736315681183615e-06, "loss": 0.7455933690071106, "step": 14258 }, { "epoch": 17.49570552147239, "grad_norm": 0.2594517469406128, "learning_rate": 2.0716305831798567e-06, "loss": 0.620958149433136, "step": 14259 }, { "epoch": 17.496932515337424, "grad_norm": 0.25415024161338806, "learning_rate": 2.069630522418528e-06, "loss": 0.5817791819572449, "step": 14260 }, { "epoch": 17.498159509202456, "grad_norm": 0.2822800874710083, "learning_rate": 2.0676313859149977e-06, "loss": 0.7804981470108032, "step": 14261 }, { "epoch": 17.499386503067484, "grad_norm": 0.2514466941356659, "learning_rate": 2.065633173749834e-06, "loss": 0.6778548955917358, "step": 14262 }, { "epoch": 17.500613496932516, "grad_norm": 0.3075786530971527, "learning_rate": 2.0636358860035897e-06, "loss": 0.6851797699928284, "step": 14263 }, { "epoch": 17.501840490797544, "grad_norm": 0.26380228996276855, "learning_rate": 2.061639522756764e-06, "loss": 0.5608134269714355, "step": 14264 }, { "epoch": 17.503067484662576, "grad_norm": 0.2713947594165802, "learning_rate": 2.059644084089829e-06, "loss": 0.5237977504730225, "step": 14265 }, { "epoch": 17.50429447852761, "grad_norm": 0.2681719660758972, "learning_rate": 2.057649570083217e-06, "loss": 0.6444656848907471, "step": 14266 }, { "epoch": 17.505521472392637, "grad_norm": 0.26825007796287537, "learning_rate": 2.0556559808173115e-06, "loss": 0.6726874113082886, "step": 14267 }, { "epoch": 17.50674846625767, "grad_norm": 0.26438552141189575, "learning_rate": 2.05366331637247e-06, "loss": 0.658279299736023, "step": 14268 }, { "epoch": 17.5079754601227, "grad_norm": 0.27858874201774597, "learning_rate": 2.05167157682902e-06, "loss": 0.47782525420188904, "step": 14269 }, { "epoch": 17.50920245398773, "grad_norm": 0.24201306700706482, "learning_rate": 2.049680762267242e-06, "loss": 0.3200555443763733, "step": 14270 }, { "epoch": 17.51042944785276, "grad_norm": 0.24775777757167816, "learning_rate": 2.0476908727673744e-06, "loss": 0.49750006198883057, "step": 14271 }, { "epoch": 17.51165644171779, "grad_norm": 0.272154837846756, "learning_rate": 2.0457019084096225e-06, "loss": 0.6853910088539124, "step": 14272 }, { "epoch": 17.512883435582822, "grad_norm": 0.2651567757129669, "learning_rate": 2.0437138692741607e-06, "loss": 0.6586159467697144, "step": 14273 }, { "epoch": 17.514110429447854, "grad_norm": 0.26933276653289795, "learning_rate": 2.0417267554411174e-06, "loss": 0.7710517048835754, "step": 14274 }, { "epoch": 17.515337423312882, "grad_norm": 0.25067222118377686, "learning_rate": 2.0397405669905946e-06, "loss": 0.5187223553657532, "step": 14275 }, { "epoch": 17.516564417177914, "grad_norm": 0.23474057018756866, "learning_rate": 2.037755304002642e-06, "loss": 0.5166373252868652, "step": 14276 }, { "epoch": 17.517791411042946, "grad_norm": 0.25840112566947937, "learning_rate": 2.035770966557282e-06, "loss": 0.6586380004882812, "step": 14277 }, { "epoch": 17.519018404907975, "grad_norm": 0.25220200419425964, "learning_rate": 2.033787554734498e-06, "loss": 0.544966459274292, "step": 14278 }, { "epoch": 17.520245398773007, "grad_norm": 0.2596554756164551, "learning_rate": 2.031805068614237e-06, "loss": 0.445959210395813, "step": 14279 }, { "epoch": 17.521472392638035, "grad_norm": 0.2985249161720276, "learning_rate": 2.02982350827641e-06, "loss": 0.557265043258667, "step": 14280 }, { "epoch": 17.522699386503067, "grad_norm": 0.3244400918483734, "learning_rate": 2.027842873800878e-06, "loss": 0.47563934326171875, "step": 14281 }, { "epoch": 17.5239263803681, "grad_norm": 0.2739798128604889, "learning_rate": 2.025863165267483e-06, "loss": 0.619111180305481, "step": 14282 }, { "epoch": 17.525153374233128, "grad_norm": 0.27822795510292053, "learning_rate": 2.0238843827560194e-06, "loss": 0.759363055229187, "step": 14283 }, { "epoch": 17.52638036809816, "grad_norm": 0.26619216799736023, "learning_rate": 2.0219065263462483e-06, "loss": 0.5187146663665771, "step": 14284 }, { "epoch": 17.52760736196319, "grad_norm": 0.279365599155426, "learning_rate": 2.0199295961178893e-06, "loss": 0.5975996851921082, "step": 14285 }, { "epoch": 17.52883435582822, "grad_norm": 0.2853270173072815, "learning_rate": 2.017953592150626e-06, "loss": 0.6122384071350098, "step": 14286 }, { "epoch": 17.530061349693252, "grad_norm": 0.24315033853054047, "learning_rate": 2.0159785145241055e-06, "loss": 0.6426345705986023, "step": 14287 }, { "epoch": 17.53128834355828, "grad_norm": 0.2803899943828583, "learning_rate": 2.0140043633179397e-06, "loss": 0.6918233036994934, "step": 14288 }, { "epoch": 17.532515337423312, "grad_norm": 0.27817296981811523, "learning_rate": 2.0120311386117e-06, "loss": 0.5922278165817261, "step": 14289 }, { "epoch": 17.533742331288344, "grad_norm": 0.30836641788482666, "learning_rate": 2.0100588404849215e-06, "loss": 0.5911199450492859, "step": 14290 }, { "epoch": 17.534969325153373, "grad_norm": 0.28655049204826355, "learning_rate": 2.0080874690171004e-06, "loss": 0.7350547313690186, "step": 14291 }, { "epoch": 17.536196319018405, "grad_norm": 0.2480843961238861, "learning_rate": 2.0061170242876986e-06, "loss": 0.5186377763748169, "step": 14292 }, { "epoch": 17.537423312883437, "grad_norm": 0.28158125281333923, "learning_rate": 2.0041475063761387e-06, "loss": 0.7089517712593079, "step": 14293 }, { "epoch": 17.538650306748465, "grad_norm": 0.27978020906448364, "learning_rate": 2.002178915361813e-06, "loss": 0.7242726683616638, "step": 14294 }, { "epoch": 17.539877300613497, "grad_norm": 0.25529929995536804, "learning_rate": 2.000211251324058e-06, "loss": 0.6170402765274048, "step": 14295 }, { "epoch": 17.54110429447853, "grad_norm": 0.22586673498153687, "learning_rate": 1.99824451434219e-06, "loss": 0.6639702320098877, "step": 14296 }, { "epoch": 17.542331288343558, "grad_norm": 0.31078749895095825, "learning_rate": 1.99627870449548e-06, "loss": 0.5831753611564636, "step": 14297 }, { "epoch": 17.54355828220859, "grad_norm": 0.27411970496177673, "learning_rate": 1.9943138218631695e-06, "loss": 0.5859456062316895, "step": 14298 }, { "epoch": 17.54478527607362, "grad_norm": 0.28566139936447144, "learning_rate": 1.992349866524454e-06, "loss": 0.7874006032943726, "step": 14299 }, { "epoch": 17.54601226993865, "grad_norm": 0.30353671312332153, "learning_rate": 1.9903868385585e-06, "loss": 0.638933002948761, "step": 14300 }, { "epoch": 17.547239263803682, "grad_norm": 0.24988165497779846, "learning_rate": 1.9884247380444227e-06, "loss": 0.6052491664886475, "step": 14301 }, { "epoch": 17.54846625766871, "grad_norm": 0.2448102831840515, "learning_rate": 1.986463565061314e-06, "loss": 0.6693822145462036, "step": 14302 }, { "epoch": 17.549693251533743, "grad_norm": 0.24871738255023956, "learning_rate": 1.984503319688222e-06, "loss": 0.5773844122886658, "step": 14303 }, { "epoch": 17.550920245398775, "grad_norm": 0.25198274850845337, "learning_rate": 1.982544002004158e-06, "loss": 0.6165083646774292, "step": 14304 }, { "epoch": 17.552147239263803, "grad_norm": 0.26620492339134216, "learning_rate": 1.9805856120881012e-06, "loss": 0.7330532073974609, "step": 14305 }, { "epoch": 17.553374233128835, "grad_norm": 0.27659356594085693, "learning_rate": 1.9786281500189823e-06, "loss": 0.6469894647598267, "step": 14306 }, { "epoch": 17.554601226993864, "grad_norm": 0.27305933833122253, "learning_rate": 1.9766716158757026e-06, "loss": 0.7543803453445435, "step": 14307 }, { "epoch": 17.555828220858896, "grad_norm": 0.24711422622203827, "learning_rate": 1.9747160097371265e-06, "loss": 0.6871770024299622, "step": 14308 }, { "epoch": 17.557055214723928, "grad_norm": 0.2763868272304535, "learning_rate": 1.972761331682077e-06, "loss": 0.7413321733474731, "step": 14309 }, { "epoch": 17.558282208588956, "grad_norm": 0.27327781915664673, "learning_rate": 1.9708075817893467e-06, "loss": 0.5294076800346375, "step": 14310 }, { "epoch": 17.559509202453988, "grad_norm": 0.27830711007118225, "learning_rate": 1.9688547601376756e-06, "loss": 0.5747636556625366, "step": 14311 }, { "epoch": 17.56073619631902, "grad_norm": 0.28333422541618347, "learning_rate": 1.966902866805781e-06, "loss": 0.847516655921936, "step": 14312 }, { "epoch": 17.56196319018405, "grad_norm": 0.23520560562610626, "learning_rate": 1.9649519018723416e-06, "loss": 0.522774338722229, "step": 14313 }, { "epoch": 17.56319018404908, "grad_norm": 0.23300829529762268, "learning_rate": 1.9630018654159978e-06, "loss": 0.5573487877845764, "step": 14314 }, { "epoch": 17.56441717791411, "grad_norm": 0.2769521176815033, "learning_rate": 1.9610527575153414e-06, "loss": 0.4203752279281616, "step": 14315 }, { "epoch": 17.56564417177914, "grad_norm": 0.253371000289917, "learning_rate": 1.9591045782489408e-06, "loss": 0.6724898219108582, "step": 14316 }, { "epoch": 17.566871165644173, "grad_norm": 0.2820114195346832, "learning_rate": 1.9571573276953166e-06, "loss": 0.7318178415298462, "step": 14317 }, { "epoch": 17.5680981595092, "grad_norm": 0.26751047372817993, "learning_rate": 1.9552110059329635e-06, "loss": 0.7239772081375122, "step": 14318 }, { "epoch": 17.569325153374233, "grad_norm": 0.2517808973789215, "learning_rate": 1.953265613040331e-06, "loss": 0.5425969362258911, "step": 14319 }, { "epoch": 17.570552147239265, "grad_norm": 0.25311070680618286, "learning_rate": 1.951321149095825e-06, "loss": 0.5863925218582153, "step": 14320 }, { "epoch": 17.571779141104294, "grad_norm": 0.2652263939380646, "learning_rate": 1.94937761417783e-06, "loss": 0.5367114543914795, "step": 14321 }, { "epoch": 17.573006134969326, "grad_norm": 0.2546062767505646, "learning_rate": 1.9474350083646787e-06, "loss": 0.4637141227722168, "step": 14322 }, { "epoch": 17.574233128834354, "grad_norm": 0.2761853337287903, "learning_rate": 1.945493331734674e-06, "loss": 0.5319190621376038, "step": 14323 }, { "epoch": 17.575460122699386, "grad_norm": 0.30398523807525635, "learning_rate": 1.9435525843660845e-06, "loss": 0.763881266117096, "step": 14324 }, { "epoch": 17.57668711656442, "grad_norm": 0.2561267912387848, "learning_rate": 1.941612766337128e-06, "loss": 0.5196483135223389, "step": 14325 }, { "epoch": 17.577914110429447, "grad_norm": 0.2637802064418793, "learning_rate": 1.9396738777259947e-06, "loss": 0.6601879596710205, "step": 14326 }, { "epoch": 17.57914110429448, "grad_norm": 0.31749945878982544, "learning_rate": 1.9377359186108392e-06, "loss": 0.4232320487499237, "step": 14327 }, { "epoch": 17.58036809815951, "grad_norm": 0.2643166184425354, "learning_rate": 1.9357988890697704e-06, "loss": 0.6582028865814209, "step": 14328 }, { "epoch": 17.58159509202454, "grad_norm": 0.27360913157463074, "learning_rate": 1.9338627891808735e-06, "loss": 0.5488544702529907, "step": 14329 }, { "epoch": 17.58282208588957, "grad_norm": 0.3105839192867279, "learning_rate": 1.931927619022178e-06, "loss": 0.42581653594970703, "step": 14330 }, { "epoch": 17.5840490797546, "grad_norm": 0.2652958929538727, "learning_rate": 1.929993378671685e-06, "loss": 0.5742343664169312, "step": 14331 }, { "epoch": 17.58527607361963, "grad_norm": 0.2848281264305115, "learning_rate": 1.9280600682073628e-06, "loss": 0.7393077611923218, "step": 14332 }, { "epoch": 17.586503067484664, "grad_norm": 0.2627299726009369, "learning_rate": 1.9261276877071355e-06, "loss": 0.563457727432251, "step": 14333 }, { "epoch": 17.587730061349692, "grad_norm": 0.2593686580657959, "learning_rate": 1.924196237248893e-06, "loss": 0.4463978707790375, "step": 14334 }, { "epoch": 17.588957055214724, "grad_norm": 0.2727314531803131, "learning_rate": 1.9222657169104845e-06, "loss": 0.6641530990600586, "step": 14335 }, { "epoch": 17.590184049079756, "grad_norm": 0.2590330243110657, "learning_rate": 1.9203361267697256e-06, "loss": 0.4375689923763275, "step": 14336 }, { "epoch": 17.591411042944785, "grad_norm": 0.26972952485084534, "learning_rate": 1.9184074669043926e-06, "loss": 0.6592639684677124, "step": 14337 }, { "epoch": 17.592638036809817, "grad_norm": 0.30288273096084595, "learning_rate": 1.9164797373922295e-06, "loss": 0.5119094848632812, "step": 14338 }, { "epoch": 17.593865030674845, "grad_norm": 0.2728808522224426, "learning_rate": 1.9145529383109263e-06, "loss": 0.5867478847503662, "step": 14339 }, { "epoch": 17.595092024539877, "grad_norm": 0.2817700207233429, "learning_rate": 1.9126270697381515e-06, "loss": 0.520989179611206, "step": 14340 }, { "epoch": 17.59631901840491, "grad_norm": 0.2752704620361328, "learning_rate": 1.910702131751532e-06, "loss": 0.593854546546936, "step": 14341 }, { "epoch": 17.597546012269937, "grad_norm": 0.30511847138404846, "learning_rate": 1.908778124428659e-06, "loss": 0.6564013957977295, "step": 14342 }, { "epoch": 17.59877300613497, "grad_norm": 0.2985595762729645, "learning_rate": 1.9068550478470838e-06, "loss": 0.5889887809753418, "step": 14343 }, { "epoch": 17.6, "grad_norm": 0.2585541307926178, "learning_rate": 1.9049329020843165e-06, "loss": 0.4911317825317383, "step": 14344 }, { "epoch": 17.60122699386503, "grad_norm": 0.2810204327106476, "learning_rate": 1.9030116872178316e-06, "loss": 0.6339596509933472, "step": 14345 }, { "epoch": 17.602453987730062, "grad_norm": 0.2830090820789337, "learning_rate": 1.9010914033250725e-06, "loss": 0.6415534019470215, "step": 14346 }, { "epoch": 17.60368098159509, "grad_norm": 0.3149171471595764, "learning_rate": 1.8991720504834388e-06, "loss": 0.7063670754432678, "step": 14347 }, { "epoch": 17.604907975460122, "grad_norm": 0.2591556906700134, "learning_rate": 1.8972536287702985e-06, "loss": 0.7062078714370728, "step": 14348 }, { "epoch": 17.606134969325154, "grad_norm": 0.2258691042661667, "learning_rate": 1.895336138262968e-06, "loss": 0.5762752890586853, "step": 14349 }, { "epoch": 17.607361963190183, "grad_norm": 0.25279170274734497, "learning_rate": 1.893419579038741e-06, "loss": 0.6081328392028809, "step": 14350 }, { "epoch": 17.608588957055215, "grad_norm": 0.2571757733821869, "learning_rate": 1.8915039511748694e-06, "loss": 0.5621274709701538, "step": 14351 }, { "epoch": 17.609815950920247, "grad_norm": 0.28867974877357483, "learning_rate": 1.8895892547485667e-06, "loss": 0.6081627011299133, "step": 14352 }, { "epoch": 17.611042944785275, "grad_norm": 0.2766903340816498, "learning_rate": 1.8876754898370124e-06, "loss": 0.5518431663513184, "step": 14353 }, { "epoch": 17.612269938650307, "grad_norm": 0.2876291573047638, "learning_rate": 1.8857626565173369e-06, "loss": 0.7846298217773438, "step": 14354 }, { "epoch": 17.61349693251534, "grad_norm": 0.26582902669906616, "learning_rate": 1.8838507548666423e-06, "loss": 0.6938156485557556, "step": 14355 }, { "epoch": 17.614723926380368, "grad_norm": 0.28136882185935974, "learning_rate": 1.8819397849619974e-06, "loss": 0.5561915040016174, "step": 14356 }, { "epoch": 17.6159509202454, "grad_norm": 0.2797040045261383, "learning_rate": 1.880029746880424e-06, "loss": 0.5614254474639893, "step": 14357 }, { "epoch": 17.617177914110428, "grad_norm": 0.2230634242296219, "learning_rate": 1.8781206406989104e-06, "loss": 0.4600488543510437, "step": 14358 }, { "epoch": 17.61840490797546, "grad_norm": 0.310924232006073, "learning_rate": 1.8762124664944091e-06, "loss": 0.6626037359237671, "step": 14359 }, { "epoch": 17.619631901840492, "grad_norm": 0.28635767102241516, "learning_rate": 1.8743052243438309e-06, "loss": 0.5710185766220093, "step": 14360 }, { "epoch": 17.62085889570552, "grad_norm": 0.25864267349243164, "learning_rate": 1.8723989143240555e-06, "loss": 0.38755494356155396, "step": 14361 }, { "epoch": 17.622085889570553, "grad_norm": 0.3102854788303375, "learning_rate": 1.8704935365119192e-06, "loss": 0.5602004528045654, "step": 14362 }, { "epoch": 17.62331288343558, "grad_norm": 0.2700243592262268, "learning_rate": 1.8685890909842186e-06, "loss": 0.6629019379615784, "step": 14363 }, { "epoch": 17.624539877300613, "grad_norm": 0.2541615962982178, "learning_rate": 1.8666855778177173e-06, "loss": 0.42377787828445435, "step": 14364 }, { "epoch": 17.625766871165645, "grad_norm": 0.3047042787075043, "learning_rate": 1.8647829970891428e-06, "loss": 0.698991060256958, "step": 14365 }, { "epoch": 17.626993865030673, "grad_norm": 0.26980042457580566, "learning_rate": 1.8628813488751812e-06, "loss": 0.5497736930847168, "step": 14366 }, { "epoch": 17.628220858895705, "grad_norm": 0.3115152418613434, "learning_rate": 1.8609806332524876e-06, "loss": 0.5967810153961182, "step": 14367 }, { "epoch": 17.629447852760737, "grad_norm": 0.2635899484157562, "learning_rate": 1.8590808502976676e-06, "loss": 0.5337077379226685, "step": 14368 }, { "epoch": 17.630674846625766, "grad_norm": 0.27072587609291077, "learning_rate": 1.8571820000872958e-06, "loss": 0.5310599207878113, "step": 14369 }, { "epoch": 17.631901840490798, "grad_norm": 0.29539018869400024, "learning_rate": 1.855284082697914e-06, "loss": 0.7454757690429688, "step": 14370 }, { "epoch": 17.63312883435583, "grad_norm": 0.2702448070049286, "learning_rate": 1.853387098206022e-06, "loss": 0.6631537079811096, "step": 14371 }, { "epoch": 17.63435582822086, "grad_norm": 0.2749934196472168, "learning_rate": 1.8514910466880808e-06, "loss": 0.609088659286499, "step": 14372 }, { "epoch": 17.63558282208589, "grad_norm": 0.28569626808166504, "learning_rate": 1.8495959282205099e-06, "loss": 0.8028026819229126, "step": 14373 }, { "epoch": 17.63680981595092, "grad_norm": 0.24699977040290833, "learning_rate": 1.8477017428797012e-06, "loss": 0.3878748118877411, "step": 14374 }, { "epoch": 17.63803680981595, "grad_norm": 0.26555773615837097, "learning_rate": 1.8458084907420043e-06, "loss": 0.528960108757019, "step": 14375 }, { "epoch": 17.639263803680983, "grad_norm": 0.2693513035774231, "learning_rate": 1.8439161718837278e-06, "loss": 0.5103534460067749, "step": 14376 }, { "epoch": 17.64049079754601, "grad_norm": 0.3010101318359375, "learning_rate": 1.8420247863811496e-06, "loss": 0.4201180934906006, "step": 14377 }, { "epoch": 17.641717791411043, "grad_norm": 0.23284177482128143, "learning_rate": 1.8401343343105032e-06, "loss": 0.5252590179443359, "step": 14378 }, { "epoch": 17.642944785276075, "grad_norm": 0.2809838652610779, "learning_rate": 1.8382448157479804e-06, "loss": 0.7658406496047974, "step": 14379 }, { "epoch": 17.644171779141104, "grad_norm": 0.2855856716632843, "learning_rate": 1.8363562307697562e-06, "loss": 0.7377073764801025, "step": 14380 }, { "epoch": 17.645398773006136, "grad_norm": 0.2783436179161072, "learning_rate": 1.8344685794519505e-06, "loss": 0.7167338132858276, "step": 14381 }, { "epoch": 17.646625766871164, "grad_norm": 0.29877394437789917, "learning_rate": 1.8325818618706414e-06, "loss": 0.5670073628425598, "step": 14382 }, { "epoch": 17.647852760736196, "grad_norm": 0.2720325291156769, "learning_rate": 1.8306960781018816e-06, "loss": 0.5749816298484802, "step": 14383 }, { "epoch": 17.649079754601228, "grad_norm": 0.25074464082717896, "learning_rate": 1.828811228221683e-06, "loss": 0.42687004804611206, "step": 14384 }, { "epoch": 17.650306748466257, "grad_norm": 0.29345938563346863, "learning_rate": 1.8269273123060178e-06, "loss": 0.3845694959163666, "step": 14385 }, { "epoch": 17.65153374233129, "grad_norm": 0.2550070881843567, "learning_rate": 1.825044330430825e-06, "loss": 0.6089211702346802, "step": 14386 }, { "epoch": 17.65276073619632, "grad_norm": 0.2555377781391144, "learning_rate": 1.8231622826719946e-06, "loss": 0.49320292472839355, "step": 14387 }, { "epoch": 17.65398773006135, "grad_norm": 0.27060437202453613, "learning_rate": 1.8212811691053876e-06, "loss": 0.49807077646255493, "step": 14388 }, { "epoch": 17.65521472392638, "grad_norm": 0.25798535346984863, "learning_rate": 1.8194009898068325e-06, "loss": 0.524013340473175, "step": 14389 }, { "epoch": 17.65644171779141, "grad_norm": 0.2725028097629547, "learning_rate": 1.8175217448521076e-06, "loss": 0.5230470299720764, "step": 14390 }, { "epoch": 17.65766871165644, "grad_norm": 0.2943117320537567, "learning_rate": 1.8156434343169687e-06, "loss": 0.5959920883178711, "step": 14391 }, { "epoch": 17.658895705521473, "grad_norm": 0.25196322798728943, "learning_rate": 1.8137660582771138e-06, "loss": 0.5285436511039734, "step": 14392 }, { "epoch": 17.660122699386502, "grad_norm": 0.2835349142551422, "learning_rate": 1.811889616808221e-06, "loss": 0.41854867339134216, "step": 14393 }, { "epoch": 17.661349693251534, "grad_norm": 0.23203280568122864, "learning_rate": 1.8100141099859219e-06, "loss": 0.48797735571861267, "step": 14394 }, { "epoch": 17.662576687116566, "grad_norm": 0.2766793668270111, "learning_rate": 1.808139537885814e-06, "loss": 0.7064447999000549, "step": 14395 }, { "epoch": 17.663803680981594, "grad_norm": 0.2602655291557312, "learning_rate": 1.8062659005834566e-06, "loss": 0.7020250558853149, "step": 14396 }, { "epoch": 17.665030674846626, "grad_norm": 0.2215147763490677, "learning_rate": 1.8043931981543749e-06, "loss": 0.413377046585083, "step": 14397 }, { "epoch": 17.666257668711655, "grad_norm": 0.3407859802246094, "learning_rate": 1.8025214306740423e-06, "loss": 0.7496203184127808, "step": 14398 }, { "epoch": 17.667484662576687, "grad_norm": 0.2658268213272095, "learning_rate": 1.8006505982179095e-06, "loss": 0.7156969308853149, "step": 14399 }, { "epoch": 17.66871165644172, "grad_norm": 0.27730366587638855, "learning_rate": 1.7987807008613854e-06, "loss": 0.682104229927063, "step": 14400 }, { "epoch": 17.669938650306747, "grad_norm": 0.26147767901420593, "learning_rate": 1.7969117386798379e-06, "loss": 0.47439491748809814, "step": 14401 }, { "epoch": 17.67116564417178, "grad_norm": 0.27708715200424194, "learning_rate": 1.7950437117486035e-06, "loss": 0.5937535762786865, "step": 14402 }, { "epoch": 17.67239263803681, "grad_norm": 0.2620975971221924, "learning_rate": 1.7931766201429723e-06, "loss": 0.4101699888706207, "step": 14403 }, { "epoch": 17.67361963190184, "grad_norm": 0.27248403429985046, "learning_rate": 1.7913104639382034e-06, "loss": 0.6348403096199036, "step": 14404 }, { "epoch": 17.67484662576687, "grad_norm": 0.2789841294288635, "learning_rate": 1.789445243209517e-06, "loss": 0.5949240922927856, "step": 14405 }, { "epoch": 17.6760736196319, "grad_norm": 0.2628888189792633, "learning_rate": 1.787580958032098e-06, "loss": 0.45168930292129517, "step": 14406 }, { "epoch": 17.677300613496932, "grad_norm": 0.2553688585758209, "learning_rate": 1.7857176084810828e-06, "loss": 0.6703063249588013, "step": 14407 }, { "epoch": 17.678527607361964, "grad_norm": 0.2887146770954132, "learning_rate": 1.783855194631584e-06, "loss": 0.5317137241363525, "step": 14408 }, { "epoch": 17.679754601226993, "grad_norm": 0.22660981118679047, "learning_rate": 1.7819937165586637e-06, "loss": 0.5763599276542664, "step": 14409 }, { "epoch": 17.680981595092025, "grad_norm": 0.26851749420166016, "learning_rate": 1.780133174337359e-06, "loss": 0.4394838213920593, "step": 14410 }, { "epoch": 17.682208588957057, "grad_norm": 0.27212029695510864, "learning_rate": 1.7782735680426655e-06, "loss": 0.5942716002464294, "step": 14411 }, { "epoch": 17.683435582822085, "grad_norm": 0.29270434379577637, "learning_rate": 1.7764148977495286e-06, "loss": 0.6551831960678101, "step": 14412 }, { "epoch": 17.684662576687117, "grad_norm": 0.23340660333633423, "learning_rate": 1.7745571635328723e-06, "loss": 0.5789006352424622, "step": 14413 }, { "epoch": 17.68588957055215, "grad_norm": 0.2515926957130432, "learning_rate": 1.7727003654675778e-06, "loss": 0.597957968711853, "step": 14414 }, { "epoch": 17.687116564417177, "grad_norm": 0.2698799967765808, "learning_rate": 1.7708445036284826e-06, "loss": 0.7288224697113037, "step": 14415 }, { "epoch": 17.68834355828221, "grad_norm": 0.263457328081131, "learning_rate": 1.7689895780903964e-06, "loss": 0.6287193298339844, "step": 14416 }, { "epoch": 17.689570552147238, "grad_norm": 0.28241053223609924, "learning_rate": 1.7671355889280816e-06, "loss": 0.618333101272583, "step": 14417 }, { "epoch": 17.69079754601227, "grad_norm": 0.23713575303554535, "learning_rate": 1.76528253621627e-06, "loss": 0.3304596245288849, "step": 14418 }, { "epoch": 17.692024539877302, "grad_norm": 0.27843257784843445, "learning_rate": 1.7634304200296493e-06, "loss": 0.5487096309661865, "step": 14419 }, { "epoch": 17.69325153374233, "grad_norm": 0.2896970212459564, "learning_rate": 1.7615792404428789e-06, "loss": 0.7352400422096252, "step": 14420 }, { "epoch": 17.694478527607362, "grad_norm": 0.2644232511520386, "learning_rate": 1.7597289975305714e-06, "loss": 0.666672945022583, "step": 14421 }, { "epoch": 17.69570552147239, "grad_norm": 0.22160853445529938, "learning_rate": 1.7578796913673034e-06, "loss": 0.30926448106765747, "step": 14422 }, { "epoch": 17.696932515337423, "grad_norm": 0.24156180024147034, "learning_rate": 1.7560313220276098e-06, "loss": 0.6096391677856445, "step": 14423 }, { "epoch": 17.698159509202455, "grad_norm": 0.27816241979599, "learning_rate": 1.7541838895860057e-06, "loss": 0.5655328631401062, "step": 14424 }, { "epoch": 17.699386503067483, "grad_norm": 0.2434120625257492, "learning_rate": 1.7523373941169541e-06, "loss": 0.5196170210838318, "step": 14425 }, { "epoch": 17.700613496932515, "grad_norm": 0.2635331451892853, "learning_rate": 1.750491835694873e-06, "loss": 0.4783024191856384, "step": 14426 }, { "epoch": 17.701840490797547, "grad_norm": 0.2635659873485565, "learning_rate": 1.7486472143941586e-06, "loss": 0.5331522822380066, "step": 14427 }, { "epoch": 17.703067484662576, "grad_norm": 0.24245093762874603, "learning_rate": 1.7468035302891568e-06, "loss": 0.6252617835998535, "step": 14428 }, { "epoch": 17.704294478527608, "grad_norm": 0.23542805016040802, "learning_rate": 1.7449607834541858e-06, "loss": 0.5733821392059326, "step": 14429 }, { "epoch": 17.70552147239264, "grad_norm": 0.25554269552230835, "learning_rate": 1.7431189739635228e-06, "loss": 0.6193656921386719, "step": 14430 }, { "epoch": 17.706748466257668, "grad_norm": 0.29763785004615784, "learning_rate": 1.7412781018914027e-06, "loss": 0.6570577621459961, "step": 14431 }, { "epoch": 17.7079754601227, "grad_norm": 0.2672852575778961, "learning_rate": 1.7394381673120242e-06, "loss": 0.5586692094802856, "step": 14432 }, { "epoch": 17.70920245398773, "grad_norm": 0.2807861566543579, "learning_rate": 1.7375991702995503e-06, "loss": 0.772005558013916, "step": 14433 }, { "epoch": 17.71042944785276, "grad_norm": 0.2668750286102295, "learning_rate": 1.7357611109281109e-06, "loss": 0.7570143938064575, "step": 14434 }, { "epoch": 17.711656441717793, "grad_norm": 0.2762105464935303, "learning_rate": 1.7339239892717908e-06, "loss": 0.632401168346405, "step": 14435 }, { "epoch": 17.71288343558282, "grad_norm": 0.2651897370815277, "learning_rate": 1.7320878054046336e-06, "loss": 0.6254444718360901, "step": 14436 }, { "epoch": 17.714110429447853, "grad_norm": 0.30766695737838745, "learning_rate": 1.730252559400658e-06, "loss": 0.6680042743682861, "step": 14437 }, { "epoch": 17.715337423312885, "grad_norm": 0.6888008117675781, "learning_rate": 1.7284182513338297e-06, "loss": 0.4033116400241852, "step": 14438 }, { "epoch": 17.716564417177914, "grad_norm": 0.27741289138793945, "learning_rate": 1.7265848812780928e-06, "loss": 0.5820201635360718, "step": 14439 }, { "epoch": 17.717791411042946, "grad_norm": 0.263315349817276, "learning_rate": 1.7247524493073431e-06, "loss": 0.5300512909889221, "step": 14440 }, { "epoch": 17.719018404907974, "grad_norm": 0.25898921489715576, "learning_rate": 1.7229209554954384e-06, "loss": 0.5288949012756348, "step": 14441 }, { "epoch": 17.720245398773006, "grad_norm": 0.24144957959651947, "learning_rate": 1.7210903999161976e-06, "loss": 0.5988331437110901, "step": 14442 }, { "epoch": 17.721472392638038, "grad_norm": 0.2512979507446289, "learning_rate": 1.7192607826434116e-06, "loss": 0.7105739116668701, "step": 14443 }, { "epoch": 17.722699386503066, "grad_norm": 0.27440232038497925, "learning_rate": 1.717432103750824e-06, "loss": 0.4024357795715332, "step": 14444 }, { "epoch": 17.7239263803681, "grad_norm": 0.2921546399593353, "learning_rate": 1.7156043633121481e-06, "loss": 0.7575478553771973, "step": 14445 }, { "epoch": 17.72515337423313, "grad_norm": 0.24694538116455078, "learning_rate": 1.713777561401045e-06, "loss": 0.5967926979064941, "step": 14446 }, { "epoch": 17.72638036809816, "grad_norm": 0.2890719473361969, "learning_rate": 1.711951698091155e-06, "loss": 0.5971429347991943, "step": 14447 }, { "epoch": 17.72760736196319, "grad_norm": 0.3042852282524109, "learning_rate": 1.710126773456075e-06, "loss": 0.643683135509491, "step": 14448 }, { "epoch": 17.72883435582822, "grad_norm": 0.2705894410610199, "learning_rate": 1.7083027875693631e-06, "loss": 0.6428991556167603, "step": 14449 }, { "epoch": 17.73006134969325, "grad_norm": 0.2516762614250183, "learning_rate": 1.7064797405045325e-06, "loss": 0.5717575550079346, "step": 14450 }, { "epoch": 17.731288343558283, "grad_norm": 0.2620484530925751, "learning_rate": 1.7046576323350661e-06, "loss": 0.4684022068977356, "step": 14451 }, { "epoch": 17.73251533742331, "grad_norm": 0.30806615948677063, "learning_rate": 1.7028364631344134e-06, "loss": 0.7536346912384033, "step": 14452 }, { "epoch": 17.733742331288344, "grad_norm": 0.2730661928653717, "learning_rate": 1.7010162329759743e-06, "loss": 0.49218887090682983, "step": 14453 }, { "epoch": 17.734969325153376, "grad_norm": 0.26885226368904114, "learning_rate": 1.699196941933126e-06, "loss": 0.49474549293518066, "step": 14454 }, { "epoch": 17.736196319018404, "grad_norm": 0.2716234028339386, "learning_rate": 1.6973785900791877e-06, "loss": 0.5910289287567139, "step": 14455 }, { "epoch": 17.737423312883436, "grad_norm": 0.2848872244358063, "learning_rate": 1.6955611774874592e-06, "loss": 0.5455286502838135, "step": 14456 }, { "epoch": 17.738650306748465, "grad_norm": 0.2808411717414856, "learning_rate": 1.69374470423119e-06, "loss": 0.7998709678649902, "step": 14457 }, { "epoch": 17.739877300613497, "grad_norm": 0.2624289095401764, "learning_rate": 1.6919291703836022e-06, "loss": 0.6524244546890259, "step": 14458 }, { "epoch": 17.74110429447853, "grad_norm": 0.30455687642097473, "learning_rate": 1.6901145760178788e-06, "loss": 0.7363672256469727, "step": 14459 }, { "epoch": 17.742331288343557, "grad_norm": 0.28727486729621887, "learning_rate": 1.6883009212071477e-06, "loss": 0.5129668116569519, "step": 14460 }, { "epoch": 17.74355828220859, "grad_norm": 0.29007336497306824, "learning_rate": 1.6864882060245223e-06, "loss": 0.3749275207519531, "step": 14461 }, { "epoch": 17.74478527607362, "grad_norm": 0.26407772302627563, "learning_rate": 1.684676430543064e-06, "loss": 0.46111637353897095, "step": 14462 }, { "epoch": 17.74601226993865, "grad_norm": 0.24980175495147705, "learning_rate": 1.6828655948358002e-06, "loss": 0.5749626159667969, "step": 14463 }, { "epoch": 17.74723926380368, "grad_norm": 0.2503575086593628, "learning_rate": 1.6810556989757253e-06, "loss": 0.6050692796707153, "step": 14464 }, { "epoch": 17.74846625766871, "grad_norm": 0.2976168990135193, "learning_rate": 1.679246743035784e-06, "loss": 0.6255719065666199, "step": 14465 }, { "epoch": 17.749693251533742, "grad_norm": 0.2517981231212616, "learning_rate": 1.6774387270888953e-06, "loss": 0.4583664536476135, "step": 14466 }, { "epoch": 17.750920245398774, "grad_norm": 0.2787110507488251, "learning_rate": 1.6756316512079318e-06, "loss": 0.6985753774642944, "step": 14467 }, { "epoch": 17.752147239263802, "grad_norm": 0.2829684019088745, "learning_rate": 1.673825515465735e-06, "loss": 0.5918815732002258, "step": 14468 }, { "epoch": 17.753374233128834, "grad_norm": 0.2527298033237457, "learning_rate": 1.6720203199351025e-06, "loss": 0.6950498819351196, "step": 14469 }, { "epoch": 17.754601226993866, "grad_norm": 0.28229647874832153, "learning_rate": 1.6702160646887955e-06, "loss": 0.4725314974784851, "step": 14470 }, { "epoch": 17.755828220858895, "grad_norm": 0.23890115320682526, "learning_rate": 1.6684127497995444e-06, "loss": 0.47889724373817444, "step": 14471 }, { "epoch": 17.757055214723927, "grad_norm": 0.27976393699645996, "learning_rate": 1.6666103753400275e-06, "loss": 0.779056191444397, "step": 14472 }, { "epoch": 17.758282208588955, "grad_norm": 0.2894750237464905, "learning_rate": 1.6648089413829032e-06, "loss": 0.8105239272117615, "step": 14473 }, { "epoch": 17.759509202453987, "grad_norm": 0.2877979278564453, "learning_rate": 1.6630084480007718e-06, "loss": 0.8243641257286072, "step": 14474 }, { "epoch": 17.76073619631902, "grad_norm": 0.2657652199268341, "learning_rate": 1.6612088952662113e-06, "loss": 0.65790194272995, "step": 14475 }, { "epoch": 17.761963190184048, "grad_norm": 0.2523839473724365, "learning_rate": 1.6594102832517554e-06, "loss": 0.582813024520874, "step": 14476 }, { "epoch": 17.76319018404908, "grad_norm": 0.24560768902301788, "learning_rate": 1.6576126120299045e-06, "loss": 0.5974304676055908, "step": 14477 }, { "epoch": 17.764417177914112, "grad_norm": 0.27486369013786316, "learning_rate": 1.6558158816731144e-06, "loss": 0.6215538382530212, "step": 14478 }, { "epoch": 17.76564417177914, "grad_norm": 0.2607943117618561, "learning_rate": 1.6540200922538052e-06, "loss": 0.4282832145690918, "step": 14479 }, { "epoch": 17.766871165644172, "grad_norm": 0.28973671793937683, "learning_rate": 1.6522252438443604e-06, "loss": 0.5674418210983276, "step": 14480 }, { "epoch": 17.7680981595092, "grad_norm": 0.25997284054756165, "learning_rate": 1.6504313365171247e-06, "loss": 0.4637855887413025, "step": 14481 }, { "epoch": 17.769325153374233, "grad_norm": 0.28390493988990784, "learning_rate": 1.6486383703444074e-06, "loss": 0.5710793733596802, "step": 14482 }, { "epoch": 17.770552147239265, "grad_norm": 0.2710730731487274, "learning_rate": 1.6468463453984838e-06, "loss": 0.47080177068710327, "step": 14483 }, { "epoch": 17.771779141104293, "grad_norm": 0.2949327230453491, "learning_rate": 1.6450552617515712e-06, "loss": 0.7702762484550476, "step": 14484 }, { "epoch": 17.773006134969325, "grad_norm": 0.28098398447036743, "learning_rate": 1.643265119475873e-06, "loss": 0.5846347212791443, "step": 14485 }, { "epoch": 17.774233128834357, "grad_norm": 0.27663174271583557, "learning_rate": 1.6414759186435424e-06, "loss": 0.6966352462768555, "step": 14486 }, { "epoch": 17.775460122699386, "grad_norm": 0.24636676907539368, "learning_rate": 1.639687659326694e-06, "loss": 0.589970588684082, "step": 14487 }, { "epoch": 17.776687116564418, "grad_norm": 0.2559802234172821, "learning_rate": 1.6379003415974175e-06, "loss": 0.5076272487640381, "step": 14488 }, { "epoch": 17.77791411042945, "grad_norm": 0.25797852873802185, "learning_rate": 1.6361139655277414e-06, "loss": 0.5841696262359619, "step": 14489 }, { "epoch": 17.779141104294478, "grad_norm": 0.29553401470184326, "learning_rate": 1.6343285311896717e-06, "loss": 0.5014752149581909, "step": 14490 }, { "epoch": 17.78036809815951, "grad_norm": 0.2689141631126404, "learning_rate": 1.6325440386551843e-06, "loss": 0.5869241952896118, "step": 14491 }, { "epoch": 17.78159509202454, "grad_norm": 0.2589794099330902, "learning_rate": 1.6307604879962025e-06, "loss": 0.7023707628250122, "step": 14492 }, { "epoch": 17.78282208588957, "grad_norm": 0.3093288540840149, "learning_rate": 1.62897787928461e-06, "loss": 0.687326192855835, "step": 14493 }, { "epoch": 17.784049079754602, "grad_norm": 0.28463014960289, "learning_rate": 1.6271962125922635e-06, "loss": 0.5737600326538086, "step": 14494 }, { "epoch": 17.78527607361963, "grad_norm": 0.2662215530872345, "learning_rate": 1.6254154879909778e-06, "loss": 0.5251301527023315, "step": 14495 }, { "epoch": 17.786503067484663, "grad_norm": 0.2652130424976349, "learning_rate": 1.6236357055525258e-06, "loss": 0.6468573212623596, "step": 14496 }, { "epoch": 17.787730061349695, "grad_norm": 0.29214316606521606, "learning_rate": 1.6218568653486504e-06, "loss": 0.6550905704498291, "step": 14497 }, { "epoch": 17.788957055214723, "grad_norm": 0.2671346664428711, "learning_rate": 1.620078967451047e-06, "loss": 0.5882951617240906, "step": 14498 }, { "epoch": 17.790184049079755, "grad_norm": 0.26868271827697754, "learning_rate": 1.6183020119313746e-06, "loss": 0.6404262185096741, "step": 14499 }, { "epoch": 17.791411042944784, "grad_norm": 0.28620389103889465, "learning_rate": 1.6165259988612652e-06, "loss": 0.5554099678993225, "step": 14500 }, { "epoch": 17.792638036809816, "grad_norm": 0.3018434941768646, "learning_rate": 1.6147509283123003e-06, "loss": 0.692064642906189, "step": 14501 }, { "epoch": 17.793865030674848, "grad_norm": 0.26555582880973816, "learning_rate": 1.6129768003560253e-06, "loss": 0.5606411695480347, "step": 14502 }, { "epoch": 17.795092024539876, "grad_norm": 0.26493602991104126, "learning_rate": 1.611203615063961e-06, "loss": 0.5916208624839783, "step": 14503 }, { "epoch": 17.79631901840491, "grad_norm": 0.2802395522594452, "learning_rate": 1.6094313725075666e-06, "loss": 0.43546223640441895, "step": 14504 }, { "epoch": 17.79754601226994, "grad_norm": 0.25258538126945496, "learning_rate": 1.6076600727582824e-06, "loss": 0.539810836315155, "step": 14505 }, { "epoch": 17.79877300613497, "grad_norm": 0.25099050998687744, "learning_rate": 1.6058897158875042e-06, "loss": 0.43675804138183594, "step": 14506 }, { "epoch": 17.8, "grad_norm": 0.36962538957595825, "learning_rate": 1.6041203019665886e-06, "loss": 0.5866479873657227, "step": 14507 }, { "epoch": 17.80122699386503, "grad_norm": 0.2473585307598114, "learning_rate": 1.6023518310668618e-06, "loss": 0.4072653651237488, "step": 14508 }, { "epoch": 17.80245398773006, "grad_norm": 0.2429029494524002, "learning_rate": 1.6005843032595947e-06, "loss": 0.42757776379585266, "step": 14509 }, { "epoch": 17.803680981595093, "grad_norm": 0.2404707968235016, "learning_rate": 1.5988177186160385e-06, "loss": 0.487602561712265, "step": 14510 }, { "epoch": 17.80490797546012, "grad_norm": 0.2901292145252228, "learning_rate": 1.597052077207395e-06, "loss": 0.6432065963745117, "step": 14511 }, { "epoch": 17.806134969325154, "grad_norm": 0.25490713119506836, "learning_rate": 1.5952873791048373e-06, "loss": 0.5464239120483398, "step": 14512 }, { "epoch": 17.807361963190186, "grad_norm": 0.29172468185424805, "learning_rate": 1.5935236243794922e-06, "loss": 0.535041093826294, "step": 14513 }, { "epoch": 17.808588957055214, "grad_norm": 0.32893475890159607, "learning_rate": 1.5917608131024552e-06, "loss": 0.6691635251045227, "step": 14514 }, { "epoch": 17.809815950920246, "grad_norm": 0.25528624653816223, "learning_rate": 1.5899989453447728e-06, "loss": 0.40936315059661865, "step": 14515 }, { "epoch": 17.811042944785274, "grad_norm": 0.2819651663303375, "learning_rate": 1.5882380211774683e-06, "loss": 0.7845460772514343, "step": 14516 }, { "epoch": 17.812269938650306, "grad_norm": 0.2538367211818695, "learning_rate": 1.5864780406715213e-06, "loss": 0.5226367712020874, "step": 14517 }, { "epoch": 17.81349693251534, "grad_norm": 0.26220065355300903, "learning_rate": 1.584719003897861e-06, "loss": 0.47917234897613525, "step": 14518 }, { "epoch": 17.814723926380367, "grad_norm": 0.2652464807033539, "learning_rate": 1.5829609109273973e-06, "loss": 0.5523488521575928, "step": 14519 }, { "epoch": 17.8159509202454, "grad_norm": 0.2618599534034729, "learning_rate": 1.5812037618309905e-06, "loss": 0.48545190691947937, "step": 14520 }, { "epoch": 17.81717791411043, "grad_norm": 0.2567989230155945, "learning_rate": 1.579447556679467e-06, "loss": 0.5344886779785156, "step": 14521 }, { "epoch": 17.81840490797546, "grad_norm": 0.2714916467666626, "learning_rate": 1.5776922955436203e-06, "loss": 0.7741023302078247, "step": 14522 }, { "epoch": 17.81963190184049, "grad_norm": 0.2299203872680664, "learning_rate": 1.5759379784941907e-06, "loss": 0.5713906288146973, "step": 14523 }, { "epoch": 17.82085889570552, "grad_norm": 0.3314440846443176, "learning_rate": 1.5741846056018917e-06, "loss": 0.6644710302352905, "step": 14524 }, { "epoch": 17.822085889570552, "grad_norm": 0.24096408486366272, "learning_rate": 1.5724321769374023e-06, "loss": 0.3701532483100891, "step": 14525 }, { "epoch": 17.823312883435584, "grad_norm": 0.26719456911087036, "learning_rate": 1.5706806925713524e-06, "loss": 0.5712520480155945, "step": 14526 }, { "epoch": 17.824539877300612, "grad_norm": 0.2500896453857422, "learning_rate": 1.5689301525743438e-06, "loss": 0.3234938979148865, "step": 14527 }, { "epoch": 17.825766871165644, "grad_norm": 0.311578631401062, "learning_rate": 1.5671805570169312e-06, "loss": 0.7870638966560364, "step": 14528 }, { "epoch": 17.826993865030676, "grad_norm": 0.27789121866226196, "learning_rate": 1.565431905969636e-06, "loss": 0.5378883481025696, "step": 14529 }, { "epoch": 17.828220858895705, "grad_norm": 0.2626987099647522, "learning_rate": 1.563684199502946e-06, "loss": 0.6227214336395264, "step": 14530 }, { "epoch": 17.829447852760737, "grad_norm": 0.24920257925987244, "learning_rate": 1.5619374376872997e-06, "loss": 0.2821338474750519, "step": 14531 }, { "epoch": 17.830674846625765, "grad_norm": 0.2617615759372711, "learning_rate": 1.5601916205931155e-06, "loss": 0.5795203447341919, "step": 14532 }, { "epoch": 17.831901840490797, "grad_norm": 0.27603378891944885, "learning_rate": 1.5584467482907482e-06, "loss": 0.6474769711494446, "step": 14533 }, { "epoch": 17.83312883435583, "grad_norm": 0.2555461823940277, "learning_rate": 1.5567028208505334e-06, "loss": 0.5072534680366516, "step": 14534 }, { "epoch": 17.834355828220858, "grad_norm": 0.275061696767807, "learning_rate": 1.5549598383427677e-06, "loss": 0.7106829881668091, "step": 14535 }, { "epoch": 17.83558282208589, "grad_norm": 0.26190558075904846, "learning_rate": 1.5532178008377058e-06, "loss": 0.5646058320999146, "step": 14536 }, { "epoch": 17.83680981595092, "grad_norm": 0.3063841760158539, "learning_rate": 1.551476708405561e-06, "loss": 0.6638450622558594, "step": 14537 }, { "epoch": 17.83803680981595, "grad_norm": 0.25269055366516113, "learning_rate": 1.5497365611165133e-06, "loss": 0.35852086544036865, "step": 14538 }, { "epoch": 17.839263803680982, "grad_norm": 0.2659214437007904, "learning_rate": 1.5479973590407009e-06, "loss": 0.6507385969161987, "step": 14539 }, { "epoch": 17.84049079754601, "grad_norm": 0.28474104404449463, "learning_rate": 1.5462591022482264e-06, "loss": 0.5481491684913635, "step": 14540 }, { "epoch": 17.841717791411043, "grad_norm": 0.2880687713623047, "learning_rate": 1.5445217908091613e-06, "loss": 0.5938142538070679, "step": 14541 }, { "epoch": 17.842944785276075, "grad_norm": 0.2858433127403259, "learning_rate": 1.542785424793522e-06, "loss": 0.4925382137298584, "step": 14542 }, { "epoch": 17.844171779141103, "grad_norm": 0.3114153742790222, "learning_rate": 1.5410500042712994e-06, "loss": 0.6614712476730347, "step": 14543 }, { "epoch": 17.845398773006135, "grad_norm": 0.2497471421957016, "learning_rate": 1.5393155293124435e-06, "loss": 0.6087071895599365, "step": 14544 }, { "epoch": 17.846625766871167, "grad_norm": 0.24536079168319702, "learning_rate": 1.5375819999868674e-06, "loss": 0.590156078338623, "step": 14545 }, { "epoch": 17.847852760736195, "grad_norm": 0.2518615126609802, "learning_rate": 1.5358494163644487e-06, "loss": 0.46484866738319397, "step": 14546 }, { "epoch": 17.849079754601227, "grad_norm": 0.28139856457710266, "learning_rate": 1.534117778515015e-06, "loss": 0.580226480960846, "step": 14547 }, { "epoch": 17.85030674846626, "grad_norm": 0.2756141722202301, "learning_rate": 1.5323870865083634e-06, "loss": 0.5730589032173157, "step": 14548 }, { "epoch": 17.851533742331288, "grad_norm": 0.26249977946281433, "learning_rate": 1.53065734041426e-06, "loss": 0.5200498104095459, "step": 14549 }, { "epoch": 17.85276073619632, "grad_norm": 0.29090723395347595, "learning_rate": 1.5289285403024211e-06, "loss": 0.5983284115791321, "step": 14550 }, { "epoch": 17.85398773006135, "grad_norm": 0.25745999813079834, "learning_rate": 1.5272006862425332e-06, "loss": 0.5472363233566284, "step": 14551 }, { "epoch": 17.85521472392638, "grad_norm": 0.23941321671009064, "learning_rate": 1.5254737783042373e-06, "loss": 0.5717787742614746, "step": 14552 }, { "epoch": 17.856441717791412, "grad_norm": 0.29118725657463074, "learning_rate": 1.5237478165571418e-06, "loss": 0.6209118366241455, "step": 14553 }, { "epoch": 17.85766871165644, "grad_norm": 0.2942374050617218, "learning_rate": 1.522022801070816e-06, "loss": 0.6844048500061035, "step": 14554 }, { "epoch": 17.858895705521473, "grad_norm": 0.2654740512371063, "learning_rate": 1.5202987319147905e-06, "loss": 0.4786403775215149, "step": 14555 }, { "epoch": 17.860122699386505, "grad_norm": 0.28547996282577515, "learning_rate": 1.5185756091585596e-06, "loss": 0.654380738735199, "step": 14556 }, { "epoch": 17.861349693251533, "grad_norm": 0.24950368702411652, "learning_rate": 1.516853432871565e-06, "loss": 0.6516945362091064, "step": 14557 }, { "epoch": 17.862576687116565, "grad_norm": 0.3078065812587738, "learning_rate": 1.51513220312324e-06, "loss": 0.8418989181518555, "step": 14558 }, { "epoch": 17.863803680981594, "grad_norm": 0.30534765124320984, "learning_rate": 1.5134119199829516e-06, "loss": 0.6099573969841003, "step": 14559 }, { "epoch": 17.865030674846626, "grad_norm": 0.2472127079963684, "learning_rate": 1.5116925835200496e-06, "loss": 0.5963634848594666, "step": 14560 }, { "epoch": 17.866257668711658, "grad_norm": 0.2644025683403015, "learning_rate": 1.509974193803823e-06, "loss": 0.596381664276123, "step": 14561 }, { "epoch": 17.867484662576686, "grad_norm": 0.244345560669899, "learning_rate": 1.5082567509035417e-06, "loss": 0.6145912408828735, "step": 14562 }, { "epoch": 17.868711656441718, "grad_norm": 0.293698251247406, "learning_rate": 1.5065402548884306e-06, "loss": 0.6953533887863159, "step": 14563 }, { "epoch": 17.86993865030675, "grad_norm": 0.3028424084186554, "learning_rate": 1.504824705827676e-06, "loss": 0.6375309824943542, "step": 14564 }, { "epoch": 17.87116564417178, "grad_norm": 0.28665441274642944, "learning_rate": 1.503110103790431e-06, "loss": 0.7814404368400574, "step": 14565 }, { "epoch": 17.87239263803681, "grad_norm": 0.25935009121894836, "learning_rate": 1.5013964488457988e-06, "loss": 0.4439695477485657, "step": 14566 }, { "epoch": 17.87361963190184, "grad_norm": 0.2631010413169861, "learning_rate": 1.4996837410628545e-06, "loss": 0.6866264343261719, "step": 14567 }, { "epoch": 17.87484662576687, "grad_norm": 0.24359236657619476, "learning_rate": 1.4979719805106345e-06, "loss": 0.5527574419975281, "step": 14568 }, { "epoch": 17.876073619631903, "grad_norm": 0.24687421321868896, "learning_rate": 1.496261167258134e-06, "loss": 0.5957076549530029, "step": 14569 }, { "epoch": 17.87730061349693, "grad_norm": 0.23686599731445312, "learning_rate": 1.4945513013743167e-06, "loss": 0.5661089420318604, "step": 14570 }, { "epoch": 17.878527607361963, "grad_norm": 0.26750171184539795, "learning_rate": 1.4928423829280918e-06, "loss": 0.6328824758529663, "step": 14571 }, { "epoch": 17.879754601226995, "grad_norm": 0.29372668266296387, "learning_rate": 1.491134411988343e-06, "loss": 0.6985042691230774, "step": 14572 }, { "epoch": 17.880981595092024, "grad_norm": 0.27370312809944153, "learning_rate": 1.4894273886239208e-06, "loss": 0.5453335046768188, "step": 14573 }, { "epoch": 17.882208588957056, "grad_norm": 0.25438982248306274, "learning_rate": 1.4877213129036255e-06, "loss": 0.6645020246505737, "step": 14574 }, { "epoch": 17.883435582822084, "grad_norm": 0.26911574602127075, "learning_rate": 1.4860161848962272e-06, "loss": 0.6708583831787109, "step": 14575 }, { "epoch": 17.884662576687116, "grad_norm": 0.3031774163246155, "learning_rate": 1.4843120046704489e-06, "loss": 0.6086454391479492, "step": 14576 }, { "epoch": 17.88588957055215, "grad_norm": 0.26895734667778015, "learning_rate": 1.4826087722949854e-06, "loss": 0.6470022201538086, "step": 14577 }, { "epoch": 17.887116564417177, "grad_norm": 0.2590526342391968, "learning_rate": 1.4809064878384877e-06, "loss": 0.6471710205078125, "step": 14578 }, { "epoch": 17.88834355828221, "grad_norm": 0.23606297373771667, "learning_rate": 1.4792051513695727e-06, "loss": 0.47087231278419495, "step": 14579 }, { "epoch": 17.88957055214724, "grad_norm": 0.30575573444366455, "learning_rate": 1.4775047629568135e-06, "loss": 0.3846961557865143, "step": 14580 }, { "epoch": 17.89079754601227, "grad_norm": 0.2800881564617157, "learning_rate": 1.4758053226687496e-06, "loss": 0.5879099369049072, "step": 14581 }, { "epoch": 17.8920245398773, "grad_norm": 0.2551717162132263, "learning_rate": 1.4741068305738792e-06, "loss": 0.632975161075592, "step": 14582 }, { "epoch": 17.89325153374233, "grad_norm": 0.2893374264240265, "learning_rate": 1.4724092867406669e-06, "loss": 0.6257842779159546, "step": 14583 }, { "epoch": 17.89447852760736, "grad_norm": 0.2591066360473633, "learning_rate": 1.470712691237533e-06, "loss": 0.6154621243476868, "step": 14584 }, { "epoch": 17.895705521472394, "grad_norm": 0.2389405518770218, "learning_rate": 1.4690170441328616e-06, "loss": 0.5803853273391724, "step": 14585 }, { "epoch": 17.896932515337422, "grad_norm": 0.26036345958709717, "learning_rate": 1.4673223454950007e-06, "loss": 0.6107214689254761, "step": 14586 }, { "epoch": 17.898159509202454, "grad_norm": 0.27903497219085693, "learning_rate": 1.46562859539226e-06, "loss": 0.6098342537879944, "step": 14587 }, { "epoch": 17.899386503067486, "grad_norm": 0.2447638064622879, "learning_rate": 1.4639357938929066e-06, "loss": 0.5947643518447876, "step": 14588 }, { "epoch": 17.900613496932515, "grad_norm": 0.26714345812797546, "learning_rate": 1.462243941065178e-06, "loss": 0.6195263862609863, "step": 14589 }, { "epoch": 17.901840490797547, "grad_norm": 0.24291105568408966, "learning_rate": 1.4605530369772608e-06, "loss": 0.512403130531311, "step": 14590 }, { "epoch": 17.903067484662575, "grad_norm": 0.2616630494594574, "learning_rate": 1.4588630816973147e-06, "loss": 0.6388986110687256, "step": 14591 }, { "epoch": 17.904294478527607, "grad_norm": 0.28447410464286804, "learning_rate": 1.4571740752934576e-06, "loss": 0.6741594076156616, "step": 14592 }, { "epoch": 17.90552147239264, "grad_norm": 0.3001502752304077, "learning_rate": 1.4554860178337653e-06, "loss": 0.8494046330451965, "step": 14593 }, { "epoch": 17.906748466257667, "grad_norm": 0.2859189808368683, "learning_rate": 1.4537989093862863e-06, "loss": 0.5723183155059814, "step": 14594 }, { "epoch": 17.9079754601227, "grad_norm": 0.25693708658218384, "learning_rate": 1.4521127500190107e-06, "loss": 0.42384105920791626, "step": 14595 }, { "epoch": 17.90920245398773, "grad_norm": 0.2814970910549164, "learning_rate": 1.4504275397999118e-06, "loss": 0.5663703083992004, "step": 14596 }, { "epoch": 17.91042944785276, "grad_norm": 0.2637481093406677, "learning_rate": 1.448743278796913e-06, "loss": 0.6658276319503784, "step": 14597 }, { "epoch": 17.911656441717792, "grad_norm": 0.2559933662414551, "learning_rate": 1.4470599670779016e-06, "loss": 0.5418700575828552, "step": 14598 }, { "epoch": 17.91288343558282, "grad_norm": 0.2136484682559967, "learning_rate": 1.445377604710732e-06, "loss": 0.37508541345596313, "step": 14599 }, { "epoch": 17.914110429447852, "grad_norm": 0.25122955441474915, "learning_rate": 1.4436961917632081e-06, "loss": 0.5960754156112671, "step": 14600 }, { "epoch": 17.915337423312884, "grad_norm": 0.2816019654273987, "learning_rate": 1.4420157283031006e-06, "loss": 0.6493737697601318, "step": 14601 }, { "epoch": 17.916564417177913, "grad_norm": 0.2641134262084961, "learning_rate": 1.4403362143981553e-06, "loss": 0.7002406716346741, "step": 14602 }, { "epoch": 17.917791411042945, "grad_norm": 0.24316608905792236, "learning_rate": 1.4386576501160653e-06, "loss": 0.6797574758529663, "step": 14603 }, { "epoch": 17.919018404907977, "grad_norm": 0.27621063590049744, "learning_rate": 1.4369800355244822e-06, "loss": 0.6116088032722473, "step": 14604 }, { "epoch": 17.920245398773005, "grad_norm": 0.30401498079299927, "learning_rate": 1.4353033706910296e-06, "loss": 0.5569919347763062, "step": 14605 }, { "epoch": 17.921472392638037, "grad_norm": 0.2975943982601166, "learning_rate": 1.4336276556832923e-06, "loss": 0.5165454745292664, "step": 14606 }, { "epoch": 17.92269938650307, "grad_norm": 0.2946213483810425, "learning_rate": 1.4319528905688078e-06, "loss": 0.5460560917854309, "step": 14607 }, { "epoch": 17.923926380368098, "grad_norm": 0.2377009093761444, "learning_rate": 1.4302790754150863e-06, "loss": 0.47542837262153625, "step": 14608 }, { "epoch": 17.92515337423313, "grad_norm": 0.23359425365924835, "learning_rate": 1.4286062102895957e-06, "loss": 0.44849640130996704, "step": 14609 }, { "epoch": 17.926380368098158, "grad_norm": 0.262854665517807, "learning_rate": 1.4269342952597575e-06, "loss": 0.7347406148910522, "step": 14610 }, { "epoch": 17.92760736196319, "grad_norm": 0.2657775580883026, "learning_rate": 1.4252633303929647e-06, "loss": 0.5693353414535522, "step": 14611 }, { "epoch": 17.928834355828222, "grad_norm": 0.31792959570884705, "learning_rate": 1.423593315756569e-06, "loss": 0.5038333535194397, "step": 14612 }, { "epoch": 17.93006134969325, "grad_norm": 0.24880990386009216, "learning_rate": 1.421924251417886e-06, "loss": 0.4884534478187561, "step": 14613 }, { "epoch": 17.931288343558283, "grad_norm": 0.24555426836013794, "learning_rate": 1.4202561374441926e-06, "loss": 0.6830617189407349, "step": 14614 }, { "epoch": 17.93251533742331, "grad_norm": 0.2723620533943176, "learning_rate": 1.418588973902721e-06, "loss": 0.6093084216117859, "step": 14615 }, { "epoch": 17.933742331288343, "grad_norm": 0.2709653377532959, "learning_rate": 1.41692276086067e-06, "loss": 0.4717769920825958, "step": 14616 }, { "epoch": 17.934969325153375, "grad_norm": 0.23278799653053284, "learning_rate": 1.4152574983852e-06, "loss": 0.48925477266311646, "step": 14617 }, { "epoch": 17.936196319018403, "grad_norm": 0.2678217589855194, "learning_rate": 1.4135931865434353e-06, "loss": 0.5740103721618652, "step": 14618 }, { "epoch": 17.937423312883435, "grad_norm": 0.24464988708496094, "learning_rate": 1.4119298254024638e-06, "loss": 0.6216524243354797, "step": 14619 }, { "epoch": 17.938650306748468, "grad_norm": 0.2594940960407257, "learning_rate": 1.4102674150293232e-06, "loss": 0.5685508251190186, "step": 14620 }, { "epoch": 17.939877300613496, "grad_norm": 0.2794358730316162, "learning_rate": 1.4086059554910185e-06, "loss": 0.805902898311615, "step": 14621 }, { "epoch": 17.941104294478528, "grad_norm": 0.27045318484306335, "learning_rate": 1.4069454468545268e-06, "loss": 0.5640958547592163, "step": 14622 }, { "epoch": 17.94233128834356, "grad_norm": 0.24375905096530914, "learning_rate": 1.4052858891867749e-06, "loss": 0.3431481122970581, "step": 14623 }, { "epoch": 17.94355828220859, "grad_norm": 0.2661164104938507, "learning_rate": 1.403627282554651e-06, "loss": 0.4697468876838684, "step": 14624 }, { "epoch": 17.94478527607362, "grad_norm": 0.23826119303703308, "learning_rate": 1.4019696270250153e-06, "loss": 0.39586636424064636, "step": 14625 }, { "epoch": 17.94601226993865, "grad_norm": 0.29387423396110535, "learning_rate": 1.4003129226646789e-06, "loss": 0.637994110584259, "step": 14626 }, { "epoch": 17.94723926380368, "grad_norm": 0.23277923464775085, "learning_rate": 1.398657169540421e-06, "loss": 0.508876383304596, "step": 14627 }, { "epoch": 17.948466257668713, "grad_norm": 0.24642880260944366, "learning_rate": 1.397002367718983e-06, "loss": 0.30661246180534363, "step": 14628 }, { "epoch": 17.94969325153374, "grad_norm": 0.3073272109031677, "learning_rate": 1.3953485172670588e-06, "loss": 0.5154626369476318, "step": 14629 }, { "epoch": 17.950920245398773, "grad_norm": 0.29507333040237427, "learning_rate": 1.3936956182513116e-06, "loss": 0.5971220135688782, "step": 14630 }, { "epoch": 17.952147239263805, "grad_norm": 0.3006948232650757, "learning_rate": 1.3920436707383688e-06, "loss": 0.7919573783874512, "step": 14631 }, { "epoch": 17.953374233128834, "grad_norm": 0.22684748470783234, "learning_rate": 1.3903926747948131e-06, "loss": 0.5472062826156616, "step": 14632 }, { "epoch": 17.954601226993866, "grad_norm": 0.2662692070007324, "learning_rate": 1.3887426304871943e-06, "loss": 0.5541741847991943, "step": 14633 }, { "epoch": 17.955828220858894, "grad_norm": 0.3192005455493927, "learning_rate": 1.3870935378820143e-06, "loss": 0.41688698530197144, "step": 14634 }, { "epoch": 17.957055214723926, "grad_norm": 0.27690204977989197, "learning_rate": 1.385445397045751e-06, "loss": 0.5172463059425354, "step": 14635 }, { "epoch": 17.958282208588958, "grad_norm": 0.2813359797000885, "learning_rate": 1.3837982080448286e-06, "loss": 0.5424720048904419, "step": 14636 }, { "epoch": 17.959509202453987, "grad_norm": 0.3012687861919403, "learning_rate": 1.38215197094565e-06, "loss": 0.5160753726959229, "step": 14637 }, { "epoch": 17.96073619631902, "grad_norm": 0.2561817467212677, "learning_rate": 1.3805066858145642e-06, "loss": 0.6985805034637451, "step": 14638 }, { "epoch": 17.96196319018405, "grad_norm": 0.2622375190258026, "learning_rate": 1.3788623527178884e-06, "loss": 0.5551583766937256, "step": 14639 }, { "epoch": 17.96319018404908, "grad_norm": 0.31576672196388245, "learning_rate": 1.3772189717218997e-06, "loss": 0.5932618379592896, "step": 14640 }, { "epoch": 17.96441717791411, "grad_norm": 0.3014386296272278, "learning_rate": 1.3755765428928397e-06, "loss": 0.5189095735549927, "step": 14641 }, { "epoch": 17.96564417177914, "grad_norm": 0.28893518447875977, "learning_rate": 1.3739350662969137e-06, "loss": 0.6193859577178955, "step": 14642 }, { "epoch": 17.96687116564417, "grad_norm": 0.2829989790916443, "learning_rate": 1.3722945420002826e-06, "loss": 0.6053096055984497, "step": 14643 }, { "epoch": 17.968098159509204, "grad_norm": 0.24034005403518677, "learning_rate": 1.3706549700690658e-06, "loss": 0.48862046003341675, "step": 14644 }, { "epoch": 17.969325153374232, "grad_norm": 0.2677293121814728, "learning_rate": 1.3690163505693547e-06, "loss": 0.615605354309082, "step": 14645 }, { "epoch": 17.970552147239264, "grad_norm": 0.2659146785736084, "learning_rate": 1.3673786835671943e-06, "loss": 0.6540563106536865, "step": 14646 }, { "epoch": 17.971779141104296, "grad_norm": 0.29513975977897644, "learning_rate": 1.3657419691286033e-06, "loss": 0.6881181001663208, "step": 14647 }, { "epoch": 17.973006134969324, "grad_norm": 0.277912437915802, "learning_rate": 1.3641062073195431e-06, "loss": 0.4790077805519104, "step": 14648 }, { "epoch": 17.974233128834356, "grad_norm": 0.26986825466156006, "learning_rate": 1.36247139820595e-06, "loss": 0.4128812551498413, "step": 14649 }, { "epoch": 17.975460122699385, "grad_norm": 0.26863521337509155, "learning_rate": 1.3608375418537184e-06, "loss": 0.6131393909454346, "step": 14650 }, { "epoch": 17.976687116564417, "grad_norm": 0.2878187298774719, "learning_rate": 1.3592046383287038e-06, "loss": 0.7043393850326538, "step": 14651 }, { "epoch": 17.97791411042945, "grad_norm": 0.2617773413658142, "learning_rate": 1.3575726876967287e-06, "loss": 0.57868492603302, "step": 14652 }, { "epoch": 17.979141104294477, "grad_norm": 0.257463276386261, "learning_rate": 1.3559416900235656e-06, "loss": 0.6122031211853027, "step": 14653 }, { "epoch": 17.98036809815951, "grad_norm": 0.2614612281322479, "learning_rate": 1.354311645374956e-06, "loss": 0.7196162939071655, "step": 14654 }, { "epoch": 17.98159509202454, "grad_norm": 0.2789611220359802, "learning_rate": 1.352682553816606e-06, "loss": 0.6364153623580933, "step": 14655 }, { "epoch": 17.98282208588957, "grad_norm": 0.28908681869506836, "learning_rate": 1.3510544154141769e-06, "loss": 0.6205997467041016, "step": 14656 }, { "epoch": 17.9840490797546, "grad_norm": 0.2980802655220032, "learning_rate": 1.3494272302332994e-06, "loss": 0.655971884727478, "step": 14657 }, { "epoch": 17.98527607361963, "grad_norm": 0.257820725440979, "learning_rate": 1.3478009983395546e-06, "loss": 0.5972291827201843, "step": 14658 }, { "epoch": 17.986503067484662, "grad_norm": 0.2680995762348175, "learning_rate": 1.3461757197984925e-06, "loss": 0.6842451095581055, "step": 14659 }, { "epoch": 17.987730061349694, "grad_norm": 0.27518877387046814, "learning_rate": 1.3445513946756222e-06, "loss": 0.6312453746795654, "step": 14660 }, { "epoch": 17.988957055214723, "grad_norm": 0.27443355321884155, "learning_rate": 1.3429280230364216e-06, "loss": 0.7069110870361328, "step": 14661 }, { "epoch": 17.990184049079755, "grad_norm": 0.285795658826828, "learning_rate": 1.341305604946322e-06, "loss": 0.6982868909835815, "step": 14662 }, { "epoch": 17.991411042944787, "grad_norm": 0.2761024534702301, "learning_rate": 1.3396841404707122e-06, "loss": 0.6290806531906128, "step": 14663 }, { "epoch": 17.992638036809815, "grad_norm": 0.26107197999954224, "learning_rate": 1.3380636296749544e-06, "loss": 0.6504873633384705, "step": 14664 }, { "epoch": 17.993865030674847, "grad_norm": 0.2861160635948181, "learning_rate": 1.3364440726243654e-06, "loss": 0.7837731838226318, "step": 14665 }, { "epoch": 17.99509202453988, "grad_norm": 0.25738802552223206, "learning_rate": 1.3348254693842238e-06, "loss": 0.6905242204666138, "step": 14666 }, { "epoch": 17.996319018404908, "grad_norm": 0.24866415560245514, "learning_rate": 1.3332078200197774e-06, "loss": 0.5362913608551025, "step": 14667 }, { "epoch": 17.99754601226994, "grad_norm": 0.2875867187976837, "learning_rate": 1.331591124596218e-06, "loss": 0.6967638731002808, "step": 14668 }, { "epoch": 17.998773006134968, "grad_norm": 0.3052618205547333, "learning_rate": 1.3299753831787192e-06, "loss": 0.7811344861984253, "step": 14669 }, { "epoch": 18.0, "grad_norm": 0.30326345562934875, "learning_rate": 1.3283605958324008e-06, "loss": 0.44550877809524536, "step": 14670 }, { "epoch": 18.001226993865032, "grad_norm": 0.27468201518058777, "learning_rate": 1.3267467626223606e-06, "loss": 0.5229916572570801, "step": 14671 }, { "epoch": 18.00245398773006, "grad_norm": 0.2245272397994995, "learning_rate": 1.3251338836136328e-06, "loss": 0.5883045196533203, "step": 14672 }, { "epoch": 18.003680981595092, "grad_norm": 0.26401230692863464, "learning_rate": 1.3235219588712377e-06, "loss": 0.4869281053543091, "step": 14673 }, { "epoch": 18.004907975460124, "grad_norm": 0.2918539345264435, "learning_rate": 1.3219109884601454e-06, "loss": 0.5983865261077881, "step": 14674 }, { "epoch": 18.006134969325153, "grad_norm": 0.24663130939006805, "learning_rate": 1.3203009724452876e-06, "loss": 0.4972448945045471, "step": 14675 }, { "epoch": 18.007361963190185, "grad_norm": 0.2538181245326996, "learning_rate": 1.3186919108915675e-06, "loss": 0.5849470496177673, "step": 14676 }, { "epoch": 18.008588957055213, "grad_norm": 0.3195498585700989, "learning_rate": 1.3170838038638278e-06, "loss": 0.4939005970954895, "step": 14677 }, { "epoch": 18.009815950920245, "grad_norm": 0.24394716322422028, "learning_rate": 1.3154766514268975e-06, "loss": 0.6386977434158325, "step": 14678 }, { "epoch": 18.011042944785277, "grad_norm": 0.21713653206825256, "learning_rate": 1.3138704536455493e-06, "loss": 0.32999545335769653, "step": 14679 }, { "epoch": 18.012269938650306, "grad_norm": 0.25246524810791016, "learning_rate": 1.3122652105845319e-06, "loss": 0.6087346076965332, "step": 14680 }, { "epoch": 18.013496932515338, "grad_norm": 0.28877395391464233, "learning_rate": 1.3106609223085459e-06, "loss": 0.6901490688323975, "step": 14681 }, { "epoch": 18.01472392638037, "grad_norm": 0.289163738489151, "learning_rate": 1.3090575888822509e-06, "loss": 0.7074642181396484, "step": 14682 }, { "epoch": 18.0159509202454, "grad_norm": 0.28363123536109924, "learning_rate": 1.307455210370276e-06, "loss": 0.6447563767433167, "step": 14683 }, { "epoch": 18.01717791411043, "grad_norm": 0.2657002806663513, "learning_rate": 1.305853786837208e-06, "loss": 0.5514336824417114, "step": 14684 }, { "epoch": 18.01840490797546, "grad_norm": 0.2831283509731293, "learning_rate": 1.3042533183475952e-06, "loss": 0.8119156956672668, "step": 14685 }, { "epoch": 18.01963190184049, "grad_norm": 0.2486710399389267, "learning_rate": 1.3026538049659532e-06, "loss": 0.7142343521118164, "step": 14686 }, { "epoch": 18.020858895705523, "grad_norm": 0.24357111752033234, "learning_rate": 1.3010552467567466e-06, "loss": 0.5224248766899109, "step": 14687 }, { "epoch": 18.02208588957055, "grad_norm": 0.24826917052268982, "learning_rate": 1.2994576437844103e-06, "loss": 0.5186973810195923, "step": 14688 }, { "epoch": 18.023312883435583, "grad_norm": 0.27868419885635376, "learning_rate": 1.2978609961133399e-06, "loss": 0.742059051990509, "step": 14689 }, { "epoch": 18.024539877300615, "grad_norm": 0.24074698984622955, "learning_rate": 1.296265303807892e-06, "loss": 0.6613558530807495, "step": 14690 }, { "epoch": 18.025766871165644, "grad_norm": 0.24142009019851685, "learning_rate": 1.2946705669323878e-06, "loss": 0.594866931438446, "step": 14691 }, { "epoch": 18.026993865030676, "grad_norm": 0.24020954966545105, "learning_rate": 1.2930767855511005e-06, "loss": 0.5475742220878601, "step": 14692 }, { "epoch": 18.028220858895704, "grad_norm": 0.2685195207595825, "learning_rate": 1.2914839597282736e-06, "loss": 0.6200052499771118, "step": 14693 }, { "epoch": 18.029447852760736, "grad_norm": 0.26735803484916687, "learning_rate": 1.2898920895281108e-06, "loss": 0.5353715419769287, "step": 14694 }, { "epoch": 18.030674846625768, "grad_norm": 0.2810949385166168, "learning_rate": 1.288301175014775e-06, "loss": 0.4903876483440399, "step": 14695 }, { "epoch": 18.031901840490796, "grad_norm": 0.2875228822231293, "learning_rate": 1.2867112162523897e-06, "loss": 0.7412351369857788, "step": 14696 }, { "epoch": 18.03312883435583, "grad_norm": 0.22166800498962402, "learning_rate": 1.2851222133050456e-06, "loss": 0.5275256037712097, "step": 14697 }, { "epoch": 18.03435582822086, "grad_norm": 0.2264336496591568, "learning_rate": 1.283534166236783e-06, "loss": 0.3994319438934326, "step": 14698 }, { "epoch": 18.03558282208589, "grad_norm": 0.27725744247436523, "learning_rate": 1.2819470751116196e-06, "loss": 0.45187127590179443, "step": 14699 }, { "epoch": 18.03680981595092, "grad_norm": 0.26830852031707764, "learning_rate": 1.2803609399935274e-06, "loss": 0.6691170334815979, "step": 14700 }, { "epoch": 18.03803680981595, "grad_norm": 0.2619655132293701, "learning_rate": 1.2787757609464296e-06, "loss": 0.22049419581890106, "step": 14701 }, { "epoch": 18.03926380368098, "grad_norm": 0.2851826250553131, "learning_rate": 1.277191538034228e-06, "loss": 0.6860067248344421, "step": 14702 }, { "epoch": 18.040490797546013, "grad_norm": 0.2619082033634186, "learning_rate": 1.2756082713207745e-06, "loss": 0.5653128623962402, "step": 14703 }, { "epoch": 18.041717791411042, "grad_norm": 0.3042795658111572, "learning_rate": 1.27402596086989e-06, "loss": 0.6747358441352844, "step": 14704 }, { "epoch": 18.042944785276074, "grad_norm": 0.2690955102443695, "learning_rate": 1.2724446067453489e-06, "loss": 0.8403098583221436, "step": 14705 }, { "epoch": 18.044171779141106, "grad_norm": 0.2661285102367401, "learning_rate": 1.2708642090108942e-06, "loss": 0.6148532629013062, "step": 14706 }, { "epoch": 18.045398773006134, "grad_norm": 0.2536851465702057, "learning_rate": 1.269284767730225e-06, "loss": 0.5828821063041687, "step": 14707 }, { "epoch": 18.046625766871166, "grad_norm": 0.27755022048950195, "learning_rate": 1.2677062829670017e-06, "loss": 0.7345338463783264, "step": 14708 }, { "epoch": 18.047852760736195, "grad_norm": 0.26204586029052734, "learning_rate": 1.266128754784854e-06, "loss": 0.6025913953781128, "step": 14709 }, { "epoch": 18.049079754601227, "grad_norm": 0.26670968532562256, "learning_rate": 1.264552183247364e-06, "loss": 0.5462385416030884, "step": 14710 }, { "epoch": 18.05030674846626, "grad_norm": 0.25775453448295593, "learning_rate": 1.262976568418084e-06, "loss": 0.5145770907402039, "step": 14711 }, { "epoch": 18.051533742331287, "grad_norm": 0.292965292930603, "learning_rate": 1.2614019103605102e-06, "loss": 0.49756181240081787, "step": 14712 }, { "epoch": 18.05276073619632, "grad_norm": 0.26325327157974243, "learning_rate": 1.2598282091381252e-06, "loss": 0.6994046568870544, "step": 14713 }, { "epoch": 18.05398773006135, "grad_norm": 0.25802725553512573, "learning_rate": 1.2582554648143562e-06, "loss": 0.4465128183364868, "step": 14714 }, { "epoch": 18.05521472392638, "grad_norm": 0.2557304799556732, "learning_rate": 1.2566836774525997e-06, "loss": 0.504986584186554, "step": 14715 }, { "epoch": 18.05644171779141, "grad_norm": 0.2756461203098297, "learning_rate": 1.255112847116202e-06, "loss": 0.659092903137207, "step": 14716 }, { "epoch": 18.05766871165644, "grad_norm": 0.32308894395828247, "learning_rate": 1.2535429738684822e-06, "loss": 0.6876434087753296, "step": 14717 }, { "epoch": 18.058895705521472, "grad_norm": 0.24270571768283844, "learning_rate": 1.2519740577727173e-06, "loss": 0.5285595655441284, "step": 14718 }, { "epoch": 18.060122699386504, "grad_norm": 0.2521442174911499, "learning_rate": 1.2504060988921485e-06, "loss": 0.6927993297576904, "step": 14719 }, { "epoch": 18.061349693251532, "grad_norm": 0.2553957998752594, "learning_rate": 1.2488390972899778e-06, "loss": 0.5332204103469849, "step": 14720 }, { "epoch": 18.062576687116565, "grad_norm": 0.2955595850944519, "learning_rate": 1.2472730530293574e-06, "loss": 0.7910521030426025, "step": 14721 }, { "epoch": 18.063803680981597, "grad_norm": 0.24877744913101196, "learning_rate": 1.2457079661734178e-06, "loss": 0.6661491394042969, "step": 14722 }, { "epoch": 18.065030674846625, "grad_norm": 0.2780570983886719, "learning_rate": 1.2441438367852388e-06, "loss": 0.5603436827659607, "step": 14723 }, { "epoch": 18.066257668711657, "grad_norm": 0.3148570656776428, "learning_rate": 1.2425806649278671e-06, "loss": 0.6911613941192627, "step": 14724 }, { "epoch": 18.067484662576685, "grad_norm": 0.26755058765411377, "learning_rate": 1.2410184506643164e-06, "loss": 0.7695028781890869, "step": 14725 }, { "epoch": 18.068711656441717, "grad_norm": 0.241511270403862, "learning_rate": 1.2394571940575446e-06, "loss": 0.5810757875442505, "step": 14726 }, { "epoch": 18.06993865030675, "grad_norm": 0.256823867559433, "learning_rate": 1.2378968951704845e-06, "loss": 0.44509875774383545, "step": 14727 }, { "epoch": 18.071165644171778, "grad_norm": 0.25903940200805664, "learning_rate": 1.2363375540660304e-06, "loss": 0.6648673415184021, "step": 14728 }, { "epoch": 18.07239263803681, "grad_norm": 0.26043501496315, "learning_rate": 1.234779170807035e-06, "loss": 0.5573819875717163, "step": 14729 }, { "epoch": 18.073619631901842, "grad_norm": 0.26453620195388794, "learning_rate": 1.2332217454563116e-06, "loss": 0.47202008962631226, "step": 14730 }, { "epoch": 18.07484662576687, "grad_norm": 0.30493998527526855, "learning_rate": 1.2316652780766324e-06, "loss": 0.4513840079307556, "step": 14731 }, { "epoch": 18.076073619631902, "grad_norm": 0.25202369689941406, "learning_rate": 1.2301097687307361e-06, "loss": 0.6360188722610474, "step": 14732 }, { "epoch": 18.07730061349693, "grad_norm": 0.2519301474094391, "learning_rate": 1.2285552174813225e-06, "loss": 0.37584954500198364, "step": 14733 }, { "epoch": 18.078527607361963, "grad_norm": 0.2923086881637573, "learning_rate": 1.227001624391047e-06, "loss": 0.5420209169387817, "step": 14734 }, { "epoch": 18.079754601226995, "grad_norm": 0.25460705161094666, "learning_rate": 1.2254489895225347e-06, "loss": 0.6598200798034668, "step": 14735 }, { "epoch": 18.080981595092023, "grad_norm": 0.24892649054527283, "learning_rate": 1.2238973129383685e-06, "loss": 0.6341156959533691, "step": 14736 }, { "epoch": 18.082208588957055, "grad_norm": 0.2726600170135498, "learning_rate": 1.2223465947010877e-06, "loss": 0.6655759811401367, "step": 14737 }, { "epoch": 18.083435582822087, "grad_norm": 0.26307088136672974, "learning_rate": 1.2207968348732e-06, "loss": 0.5710554122924805, "step": 14738 }, { "epoch": 18.084662576687116, "grad_norm": 0.2601437270641327, "learning_rate": 1.2192480335171758e-06, "loss": 0.5255711078643799, "step": 14739 }, { "epoch": 18.085889570552148, "grad_norm": 0.24033832550048828, "learning_rate": 1.2177001906954338e-06, "loss": 0.496280699968338, "step": 14740 }, { "epoch": 18.08711656441718, "grad_norm": 0.29315364360809326, "learning_rate": 1.216153306470369e-06, "loss": 0.6240687370300293, "step": 14741 }, { "epoch": 18.088343558282208, "grad_norm": 0.23308129608631134, "learning_rate": 1.2146073809043312e-06, "loss": 0.470639705657959, "step": 14742 }, { "epoch": 18.08957055214724, "grad_norm": 0.2676543593406677, "learning_rate": 1.2130624140596291e-06, "loss": 0.4614896774291992, "step": 14743 }, { "epoch": 18.09079754601227, "grad_norm": 0.28502893447875977, "learning_rate": 1.2115184059985463e-06, "loss": 0.6204589605331421, "step": 14744 }, { "epoch": 18.0920245398773, "grad_norm": 0.2745014727115631, "learning_rate": 1.2099753567833022e-06, "loss": 0.6177423000335693, "step": 14745 }, { "epoch": 18.093251533742333, "grad_norm": 0.266347199678421, "learning_rate": 1.2084332664761027e-06, "loss": 0.5986963510513306, "step": 14746 }, { "epoch": 18.09447852760736, "grad_norm": 0.297341525554657, "learning_rate": 1.206892135139101e-06, "loss": 0.6357883214950562, "step": 14747 }, { "epoch": 18.095705521472393, "grad_norm": 0.26656341552734375, "learning_rate": 1.2053519628344168e-06, "loss": 0.6391650438308716, "step": 14748 }, { "epoch": 18.096932515337425, "grad_norm": 0.3141368329524994, "learning_rate": 1.2038127496241364e-06, "loss": 0.7222796678543091, "step": 14749 }, { "epoch": 18.098159509202453, "grad_norm": 0.29429808259010315, "learning_rate": 1.2022744955702908e-06, "loss": 0.6423506140708923, "step": 14750 }, { "epoch": 18.099386503067485, "grad_norm": 0.31961479783058167, "learning_rate": 1.200737200734886e-06, "loss": 0.5356523394584656, "step": 14751 }, { "epoch": 18.100613496932514, "grad_norm": 0.27962884306907654, "learning_rate": 1.1992008651798892e-06, "loss": 0.6289323568344116, "step": 14752 }, { "epoch": 18.101840490797546, "grad_norm": 0.28391116857528687, "learning_rate": 1.19766548896722e-06, "loss": 0.7367600202560425, "step": 14753 }, { "epoch": 18.103067484662578, "grad_norm": 0.255130410194397, "learning_rate": 1.1961310721587737e-06, "loss": 0.6962535977363586, "step": 14754 }, { "epoch": 18.104294478527606, "grad_norm": 0.2768807113170624, "learning_rate": 1.1945976148163924e-06, "loss": 0.29734355211257935, "step": 14755 }, { "epoch": 18.10552147239264, "grad_norm": 0.2612367570400238, "learning_rate": 1.193065117001882e-06, "loss": 0.6541072726249695, "step": 14756 }, { "epoch": 18.10674846625767, "grad_norm": 0.2620174288749695, "learning_rate": 1.1915335787770155e-06, "loss": 0.34555530548095703, "step": 14757 }, { "epoch": 18.1079754601227, "grad_norm": 0.24455462396144867, "learning_rate": 1.1900030002035352e-06, "loss": 0.5935930013656616, "step": 14758 }, { "epoch": 18.10920245398773, "grad_norm": 0.2739318609237671, "learning_rate": 1.1884733813431193e-06, "loss": 0.6299898624420166, "step": 14759 }, { "epoch": 18.11042944785276, "grad_norm": 0.25147828459739685, "learning_rate": 1.1869447222574299e-06, "loss": 0.6016464829444885, "step": 14760 }, { "epoch": 18.11165644171779, "grad_norm": 0.3216879665851593, "learning_rate": 1.1854170230080813e-06, "loss": 0.5907288193702698, "step": 14761 }, { "epoch": 18.112883435582823, "grad_norm": 0.28898489475250244, "learning_rate": 1.1838902836566524e-06, "loss": 0.8385666608810425, "step": 14762 }, { "epoch": 18.11411042944785, "grad_norm": 0.2882368862628937, "learning_rate": 1.1823645042646852e-06, "loss": 0.7034389972686768, "step": 14763 }, { "epoch": 18.115337423312884, "grad_norm": 0.27602750062942505, "learning_rate": 1.1808396848936699e-06, "loss": 0.5460613369941711, "step": 14764 }, { "epoch": 18.116564417177916, "grad_norm": 0.27741363644599915, "learning_rate": 1.1793158256050708e-06, "loss": 0.6697900295257568, "step": 14765 }, { "epoch": 18.117791411042944, "grad_norm": 0.27683332562446594, "learning_rate": 1.1777929264603138e-06, "loss": 0.7239335775375366, "step": 14766 }, { "epoch": 18.119018404907976, "grad_norm": 0.2802925109863281, "learning_rate": 1.1762709875207806e-06, "loss": 0.5545239448547363, "step": 14767 }, { "epoch": 18.120245398773005, "grad_norm": 0.26545843482017517, "learning_rate": 1.174750008847819e-06, "loss": 0.6048265695571899, "step": 14768 }, { "epoch": 18.121472392638037, "grad_norm": 0.26717478036880493, "learning_rate": 1.1732299905027306e-06, "loss": 0.610572338104248, "step": 14769 }, { "epoch": 18.12269938650307, "grad_norm": 0.27197328209877014, "learning_rate": 1.1717109325467852e-06, "loss": 0.3873680531978607, "step": 14770 }, { "epoch": 18.123926380368097, "grad_norm": 0.24926748871803284, "learning_rate": 1.1701928350412117e-06, "loss": 0.5061421394348145, "step": 14771 }, { "epoch": 18.12515337423313, "grad_norm": 0.4047524034976959, "learning_rate": 1.1686756980472001e-06, "loss": 0.5245703458786011, "step": 14772 }, { "epoch": 18.12638036809816, "grad_norm": 0.24831652641296387, "learning_rate": 1.1671595216259047e-06, "loss": 0.6340184807777405, "step": 14773 }, { "epoch": 18.12760736196319, "grad_norm": 0.3011031448841095, "learning_rate": 1.1656443058384313e-06, "loss": 0.5802903175354004, "step": 14774 }, { "epoch": 18.12883435582822, "grad_norm": 0.25693610310554504, "learning_rate": 1.1641300507458597e-06, "loss": 0.48606646060943604, "step": 14775 }, { "epoch": 18.13006134969325, "grad_norm": 0.25373196601867676, "learning_rate": 1.1626167564092237e-06, "loss": 0.5785506367683411, "step": 14776 }, { "epoch": 18.131288343558282, "grad_norm": 0.25822749733924866, "learning_rate": 1.1611044228895224e-06, "loss": 0.660150408744812, "step": 14777 }, { "epoch": 18.132515337423314, "grad_norm": 0.3991084694862366, "learning_rate": 1.1595930502477121e-06, "loss": 0.5788047313690186, "step": 14778 }, { "epoch": 18.133742331288342, "grad_norm": 0.269821435213089, "learning_rate": 1.1580826385447024e-06, "loss": 0.5926434993743896, "step": 14779 }, { "epoch": 18.134969325153374, "grad_norm": 0.2682454586029053, "learning_rate": 1.1565731878413865e-06, "loss": 0.4119260609149933, "step": 14780 }, { "epoch": 18.136196319018406, "grad_norm": 0.2811979353427887, "learning_rate": 1.1550646981986047e-06, "loss": 0.6638317108154297, "step": 14781 }, { "epoch": 18.137423312883435, "grad_norm": 0.2945413589477539, "learning_rate": 1.153557169677158e-06, "loss": 0.8222766518592834, "step": 14782 }, { "epoch": 18.138650306748467, "grad_norm": 0.25061318278312683, "learning_rate": 1.1520506023378064e-06, "loss": 0.5434099435806274, "step": 14783 }, { "epoch": 18.139877300613495, "grad_norm": 0.29450666904449463, "learning_rate": 1.150544996241279e-06, "loss": 0.6502367258071899, "step": 14784 }, { "epoch": 18.141104294478527, "grad_norm": 0.26420921087265015, "learning_rate": 1.1490403514482607e-06, "loss": 0.5811017751693726, "step": 14785 }, { "epoch": 18.14233128834356, "grad_norm": 0.23802681267261505, "learning_rate": 1.1475366680194028e-06, "loss": 0.4077605605125427, "step": 14786 }, { "epoch": 18.143558282208588, "grad_norm": 0.28373652696609497, "learning_rate": 1.1460339460153152e-06, "loss": 0.6054940223693848, "step": 14787 }, { "epoch": 18.14478527607362, "grad_norm": 0.2780472934246063, "learning_rate": 1.1445321854965608e-06, "loss": 0.655227541923523, "step": 14788 }, { "epoch": 18.14601226993865, "grad_norm": 0.2564193606376648, "learning_rate": 1.1430313865236769e-06, "loss": 0.5690796375274658, "step": 14789 }, { "epoch": 18.14723926380368, "grad_norm": 0.29904335737228394, "learning_rate": 1.141531549157157e-06, "loss": 0.5125435590744019, "step": 14790 }, { "epoch": 18.148466257668712, "grad_norm": 0.27841755747795105, "learning_rate": 1.140032673457453e-06, "loss": 0.7519367933273315, "step": 14791 }, { "epoch": 18.14969325153374, "grad_norm": 0.24487988650798798, "learning_rate": 1.1385347594849854e-06, "loss": 0.6062588095664978, "step": 14792 }, { "epoch": 18.150920245398773, "grad_norm": 0.24204793572425842, "learning_rate": 1.1370378073001204e-06, "loss": 0.4515277147293091, "step": 14793 }, { "epoch": 18.152147239263805, "grad_norm": 0.28201863169670105, "learning_rate": 1.1355418169632038e-06, "loss": 0.4382992088794708, "step": 14794 }, { "epoch": 18.153374233128833, "grad_norm": 0.2509777843952179, "learning_rate": 1.1340467885345347e-06, "loss": 0.4443310499191284, "step": 14795 }, { "epoch": 18.154601226993865, "grad_norm": 0.28569814562797546, "learning_rate": 1.1325527220743703e-06, "loss": 0.5561038255691528, "step": 14796 }, { "epoch": 18.155828220858897, "grad_norm": 0.3020141124725342, "learning_rate": 1.131059617642935e-06, "loss": 0.7362170219421387, "step": 14797 }, { "epoch": 18.157055214723925, "grad_norm": 0.29274046421051025, "learning_rate": 1.129567475300408e-06, "loss": 0.5618748664855957, "step": 14798 }, { "epoch": 18.158282208588957, "grad_norm": 0.33272233605384827, "learning_rate": 1.1280762951069361e-06, "loss": 0.5052449703216553, "step": 14799 }, { "epoch": 18.15950920245399, "grad_norm": 0.3003717362880707, "learning_rate": 1.1265860771226238e-06, "loss": 0.7986615896224976, "step": 14800 }, { "epoch": 18.160736196319018, "grad_norm": 0.2630554735660553, "learning_rate": 1.1250968214075398e-06, "loss": 0.4393942356109619, "step": 14801 }, { "epoch": 18.16196319018405, "grad_norm": 0.26669883728027344, "learning_rate": 1.123608528021708e-06, "loss": 0.6282778382301331, "step": 14802 }, { "epoch": 18.16319018404908, "grad_norm": 0.24756713211536407, "learning_rate": 1.1221211970251166e-06, "loss": 0.6005612015724182, "step": 14803 }, { "epoch": 18.16441717791411, "grad_norm": 0.2812489867210388, "learning_rate": 1.1206348284777207e-06, "loss": 0.4900221824645996, "step": 14804 }, { "epoch": 18.165644171779142, "grad_norm": 0.28689664602279663, "learning_rate": 1.1191494224394277e-06, "loss": 0.6676018238067627, "step": 14805 }, { "epoch": 18.16687116564417, "grad_norm": 0.24836260080337524, "learning_rate": 1.117664978970112e-06, "loss": 0.707294225692749, "step": 14806 }, { "epoch": 18.168098159509203, "grad_norm": 0.27456435561180115, "learning_rate": 1.1161814981296064e-06, "loss": 0.5701701641082764, "step": 14807 }, { "epoch": 18.169325153374235, "grad_norm": 0.2573767602443695, "learning_rate": 1.1146989799777046e-06, "loss": 0.5654751062393188, "step": 14808 }, { "epoch": 18.170552147239263, "grad_norm": 0.3141152858734131, "learning_rate": 1.1132174245741646e-06, "loss": 0.5964987277984619, "step": 14809 }, { "epoch": 18.171779141104295, "grad_norm": 0.2930687963962555, "learning_rate": 1.1117368319787046e-06, "loss": 0.3554658889770508, "step": 14810 }, { "epoch": 18.173006134969324, "grad_norm": 0.27443426847457886, "learning_rate": 1.1102572022509972e-06, "loss": 0.6657389402389526, "step": 14811 }, { "epoch": 18.174233128834356, "grad_norm": 0.25543758273124695, "learning_rate": 1.1087785354506942e-06, "loss": 0.8102816343307495, "step": 14812 }, { "epoch": 18.175460122699388, "grad_norm": 0.25440800189971924, "learning_rate": 1.1073008316373812e-06, "loss": 0.6219803094863892, "step": 14813 }, { "epoch": 18.176687116564416, "grad_norm": 0.2546125650405884, "learning_rate": 1.1058240908706302e-06, "loss": 0.6112823486328125, "step": 14814 }, { "epoch": 18.177914110429448, "grad_norm": 0.24623389542102814, "learning_rate": 1.1043483132099602e-06, "loss": 0.4982820153236389, "step": 14815 }, { "epoch": 18.17914110429448, "grad_norm": 0.23903052508831024, "learning_rate": 1.102873498714857e-06, "loss": 0.4568873643875122, "step": 14816 }, { "epoch": 18.18036809815951, "grad_norm": 0.24269966781139374, "learning_rate": 1.101399647444773e-06, "loss": 0.47848859429359436, "step": 14817 }, { "epoch": 18.18159509202454, "grad_norm": 0.2939677834510803, "learning_rate": 1.0999267594591023e-06, "loss": 0.5587877035140991, "step": 14818 }, { "epoch": 18.18282208588957, "grad_norm": 0.285671204328537, "learning_rate": 1.098454834817217e-06, "loss": 0.5312268137931824, "step": 14819 }, { "epoch": 18.1840490797546, "grad_norm": 0.25661855936050415, "learning_rate": 1.09698387357845e-06, "loss": 0.4996369481086731, "step": 14820 }, { "epoch": 18.185276073619633, "grad_norm": 0.28520864248275757, "learning_rate": 1.09551387580209e-06, "loss": 0.6270918846130371, "step": 14821 }, { "epoch": 18.18650306748466, "grad_norm": 0.29174116253852844, "learning_rate": 1.0940448415473897e-06, "loss": 0.6711344718933105, "step": 14822 }, { "epoch": 18.187730061349694, "grad_norm": 0.2522742450237274, "learning_rate": 1.0925767708735545e-06, "loss": 0.48907315731048584, "step": 14823 }, { "epoch": 18.188957055214726, "grad_norm": 0.2279265969991684, "learning_rate": 1.0911096638397678e-06, "loss": 0.4598676860332489, "step": 14824 }, { "epoch": 18.190184049079754, "grad_norm": 0.2627038359642029, "learning_rate": 1.0896435205051597e-06, "loss": 0.4075767993927002, "step": 14825 }, { "epoch": 18.191411042944786, "grad_norm": 0.2452666014432907, "learning_rate": 1.0881783409288278e-06, "loss": 0.4360887408256531, "step": 14826 }, { "epoch": 18.192638036809814, "grad_norm": 0.2526918053627014, "learning_rate": 1.0867141251698271e-06, "loss": 0.5169374346733093, "step": 14827 }, { "epoch": 18.193865030674846, "grad_norm": 0.2482978254556656, "learning_rate": 1.0852508732871774e-06, "loss": 0.540313720703125, "step": 14828 }, { "epoch": 18.19509202453988, "grad_norm": 0.2726093530654907, "learning_rate": 1.0837885853398566e-06, "loss": 0.7479716539382935, "step": 14829 }, { "epoch": 18.196319018404907, "grad_norm": 0.265191912651062, "learning_rate": 1.0823272613868091e-06, "loss": 0.7461106777191162, "step": 14830 }, { "epoch": 18.19754601226994, "grad_norm": 0.2606388032436371, "learning_rate": 1.0808669014869349e-06, "loss": 0.5220180749893188, "step": 14831 }, { "epoch": 18.19877300613497, "grad_norm": 0.27313604950904846, "learning_rate": 1.0794075056990955e-06, "loss": 0.5687035322189331, "step": 14832 }, { "epoch": 18.2, "grad_norm": 0.2549886703491211, "learning_rate": 1.077949074082113e-06, "loss": 0.5669887065887451, "step": 14833 }, { "epoch": 18.20122699386503, "grad_norm": 0.2542164921760559, "learning_rate": 1.0764916066947794e-06, "loss": 0.6500406265258789, "step": 14834 }, { "epoch": 18.20245398773006, "grad_norm": 0.265130877494812, "learning_rate": 1.075035103595834e-06, "loss": 0.7309906482696533, "step": 14835 }, { "epoch": 18.20368098159509, "grad_norm": 0.27156496047973633, "learning_rate": 1.0735795648439906e-06, "loss": 0.500552773475647, "step": 14836 }, { "epoch": 18.204907975460124, "grad_norm": 0.24534833431243896, "learning_rate": 1.0721249904979136e-06, "loss": 0.6094156503677368, "step": 14837 }, { "epoch": 18.206134969325152, "grad_norm": 0.3039546012878418, "learning_rate": 1.0706713806162337e-06, "loss": 0.48269039392471313, "step": 14838 }, { "epoch": 18.207361963190184, "grad_norm": 0.268101304769516, "learning_rate": 1.0692187352575405e-06, "loss": 0.5692702531814575, "step": 14839 }, { "epoch": 18.208588957055216, "grad_norm": 0.27422213554382324, "learning_rate": 1.067767054480387e-06, "loss": 0.786743700504303, "step": 14840 }, { "epoch": 18.209815950920245, "grad_norm": 0.26179584860801697, "learning_rate": 1.06631633834329e-06, "loss": 0.48063573241233826, "step": 14841 }, { "epoch": 18.211042944785277, "grad_norm": 0.29538387060165405, "learning_rate": 1.0648665869047198e-06, "loss": 0.5721991062164307, "step": 14842 }, { "epoch": 18.212269938650305, "grad_norm": 0.29689571261405945, "learning_rate": 1.0634178002231098e-06, "loss": 0.563075602054596, "step": 14843 }, { "epoch": 18.213496932515337, "grad_norm": 0.24677696824073792, "learning_rate": 1.0619699783568583e-06, "loss": 0.6379353404045105, "step": 14844 }, { "epoch": 18.21472392638037, "grad_norm": 0.27494364976882935, "learning_rate": 1.0605231213643262e-06, "loss": 0.739030122756958, "step": 14845 }, { "epoch": 18.215950920245398, "grad_norm": 0.27808767557144165, "learning_rate": 1.0590772293038259e-06, "loss": 0.5933088064193726, "step": 14846 }, { "epoch": 18.21717791411043, "grad_norm": 0.27176904678344727, "learning_rate": 1.0576323022336438e-06, "loss": 0.6576582193374634, "step": 14847 }, { "epoch": 18.21840490797546, "grad_norm": 0.26141417026519775, "learning_rate": 1.0561883402120166e-06, "loss": 0.4209238290786743, "step": 14848 }, { "epoch": 18.21963190184049, "grad_norm": 0.25454291701316833, "learning_rate": 1.0547453432971505e-06, "loss": 0.5694785118103027, "step": 14849 }, { "epoch": 18.220858895705522, "grad_norm": 0.25230881571769714, "learning_rate": 1.0533033115472047e-06, "loss": 0.6020994186401367, "step": 14850 }, { "epoch": 18.22208588957055, "grad_norm": 0.28071144223213196, "learning_rate": 1.0518622450203019e-06, "loss": 0.6659685373306274, "step": 14851 }, { "epoch": 18.223312883435582, "grad_norm": 0.288470983505249, "learning_rate": 1.0504221437745321e-06, "loss": 0.627869725227356, "step": 14852 }, { "epoch": 18.224539877300614, "grad_norm": 0.27747583389282227, "learning_rate": 1.0489830078679374e-06, "loss": 0.5762244462966919, "step": 14853 }, { "epoch": 18.225766871165643, "grad_norm": 0.25105953216552734, "learning_rate": 1.0475448373585273e-06, "loss": 0.5180539488792419, "step": 14854 }, { "epoch": 18.226993865030675, "grad_norm": 0.27241671085357666, "learning_rate": 1.0461076323042745e-06, "loss": 0.44019559025764465, "step": 14855 }, { "epoch": 18.228220858895707, "grad_norm": 0.2730450928211212, "learning_rate": 1.0446713927630997e-06, "loss": 0.7107751369476318, "step": 14856 }, { "epoch": 18.229447852760735, "grad_norm": 0.24653923511505127, "learning_rate": 1.0432361187929007e-06, "loss": 0.606518030166626, "step": 14857 }, { "epoch": 18.230674846625767, "grad_norm": 0.31581035256385803, "learning_rate": 1.0418018104515259e-06, "loss": 0.668636679649353, "step": 14858 }, { "epoch": 18.2319018404908, "grad_norm": 0.24100524187088013, "learning_rate": 1.0403684677967873e-06, "loss": 0.4480138421058655, "step": 14859 }, { "epoch": 18.233128834355828, "grad_norm": 0.2734662890434265, "learning_rate": 1.0389360908864665e-06, "loss": 0.5586706399917603, "step": 14860 }, { "epoch": 18.23435582822086, "grad_norm": 0.2974356412887573, "learning_rate": 1.0375046797782866e-06, "loss": 0.5544067025184631, "step": 14861 }, { "epoch": 18.235582822085888, "grad_norm": 0.2644416093826294, "learning_rate": 1.0360742345299517e-06, "loss": 0.5195169448852539, "step": 14862 }, { "epoch": 18.23680981595092, "grad_norm": 0.26911020278930664, "learning_rate": 1.0346447551991184e-06, "loss": 0.6369551420211792, "step": 14863 }, { "epoch": 18.238036809815952, "grad_norm": 0.25652408599853516, "learning_rate": 1.0332162418434016e-06, "loss": 0.41512924432754517, "step": 14864 }, { "epoch": 18.23926380368098, "grad_norm": 0.24807386100292206, "learning_rate": 1.0317886945203858e-06, "loss": 0.5505387783050537, "step": 14865 }, { "epoch": 18.240490797546013, "grad_norm": 0.27318164706230164, "learning_rate": 1.0303621132876084e-06, "loss": 0.5980280637741089, "step": 14866 }, { "epoch": 18.241717791411045, "grad_norm": 0.2699911296367645, "learning_rate": 1.0289364982025678e-06, "loss": 0.7036045789718628, "step": 14867 }, { "epoch": 18.242944785276073, "grad_norm": 0.28117480874061584, "learning_rate": 1.0275118493227265e-06, "loss": 0.4618714451789856, "step": 14868 }, { "epoch": 18.244171779141105, "grad_norm": 0.2638676166534424, "learning_rate": 1.026088166705516e-06, "loss": 0.4010728597640991, "step": 14869 }, { "epoch": 18.245398773006134, "grad_norm": 0.21647456288337708, "learning_rate": 1.0246654504083158e-06, "loss": 0.38217219710350037, "step": 14870 }, { "epoch": 18.246625766871166, "grad_norm": 0.28368106484413147, "learning_rate": 1.0232437004884688e-06, "loss": 0.7266005873680115, "step": 14871 }, { "epoch": 18.247852760736198, "grad_norm": 0.2680337429046631, "learning_rate": 1.0218229170032845e-06, "loss": 0.6680964827537537, "step": 14872 }, { "epoch": 18.249079754601226, "grad_norm": 0.2281462699174881, "learning_rate": 1.0204031000100312e-06, "loss": 0.6115928888320923, "step": 14873 }, { "epoch": 18.250306748466258, "grad_norm": 0.2603338956832886, "learning_rate": 1.0189842495659407e-06, "loss": 0.5300279855728149, "step": 14874 }, { "epoch": 18.25153374233129, "grad_norm": 0.2658950984477997, "learning_rate": 1.0175663657281954e-06, "loss": 0.6190230846405029, "step": 14875 }, { "epoch": 18.25276073619632, "grad_norm": 0.2600836157798767, "learning_rate": 1.016149448553949e-06, "loss": 0.7614256143569946, "step": 14876 }, { "epoch": 18.25398773006135, "grad_norm": 0.26288658380508423, "learning_rate": 1.0147334981003177e-06, "loss": 0.651297926902771, "step": 14877 }, { "epoch": 18.25521472392638, "grad_norm": 0.283282995223999, "learning_rate": 1.0133185144243662e-06, "loss": 0.5358506441116333, "step": 14878 }, { "epoch": 18.25644171779141, "grad_norm": 0.2618766129016876, "learning_rate": 1.0119044975831383e-06, "loss": 0.4243462085723877, "step": 14879 }, { "epoch": 18.257668711656443, "grad_norm": 0.24442726373672485, "learning_rate": 1.0104914476336214e-06, "loss": 0.6007345914840698, "step": 14880 }, { "epoch": 18.25889570552147, "grad_norm": 0.27434179186820984, "learning_rate": 1.0090793646327756e-06, "loss": 0.558912992477417, "step": 14881 }, { "epoch": 18.260122699386503, "grad_norm": 0.28145796060562134, "learning_rate": 1.0076682486375138e-06, "loss": 0.6267221570014954, "step": 14882 }, { "epoch": 18.261349693251535, "grad_norm": 0.28936028480529785, "learning_rate": 1.0062580997047155e-06, "loss": 0.5278307795524597, "step": 14883 }, { "epoch": 18.262576687116564, "grad_norm": 0.2782147228717804, "learning_rate": 1.0048489178912269e-06, "loss": 0.6753562688827515, "step": 14884 }, { "epoch": 18.263803680981596, "grad_norm": 0.2909688651561737, "learning_rate": 1.0034407032538384e-06, "loss": 0.548197865486145, "step": 14885 }, { "epoch": 18.265030674846624, "grad_norm": 0.2500540018081665, "learning_rate": 1.0020334558493134e-06, "loss": 0.5557026863098145, "step": 14886 }, { "epoch": 18.266257668711656, "grad_norm": 0.25284066796302795, "learning_rate": 1.0006271757343782e-06, "loss": 0.44580674171447754, "step": 14887 }, { "epoch": 18.26748466257669, "grad_norm": 0.2701283395290375, "learning_rate": 9.9922186296571e-07, "loss": 0.5255430936813354, "step": 14888 }, { "epoch": 18.268711656441717, "grad_norm": 0.25274714827537537, "learning_rate": 9.97817517599961e-07, "loss": 0.5965268611907959, "step": 14889 }, { "epoch": 18.26993865030675, "grad_norm": 0.24520404636859894, "learning_rate": 9.964141396937243e-07, "loss": 0.47859692573547363, "step": 14890 }, { "epoch": 18.27116564417178, "grad_norm": 0.24645228683948517, "learning_rate": 9.950117293035771e-07, "loss": 0.45457926392555237, "step": 14891 }, { "epoch": 18.27239263803681, "grad_norm": 0.2678927183151245, "learning_rate": 9.936102864860435e-07, "loss": 0.5717335343360901, "step": 14892 }, { "epoch": 18.27361963190184, "grad_norm": 0.2451895922422409, "learning_rate": 9.92209811297612e-07, "loss": 0.628893256187439, "step": 14893 }, { "epoch": 18.27484662576687, "grad_norm": 0.27695536613464355, "learning_rate": 9.908103037947286e-07, "loss": 0.7185591459274292, "step": 14894 }, { "epoch": 18.2760736196319, "grad_norm": 0.23462247848510742, "learning_rate": 9.89411764033807e-07, "loss": 0.5936027765274048, "step": 14895 }, { "epoch": 18.277300613496934, "grad_norm": 0.27535203099250793, "learning_rate": 9.880141920712155e-07, "loss": 0.6924750804901123, "step": 14896 }, { "epoch": 18.278527607361962, "grad_norm": 0.25745025277137756, "learning_rate": 9.866175879632871e-07, "loss": 0.6263197660446167, "step": 14897 }, { "epoch": 18.279754601226994, "grad_norm": 0.2636398375034332, "learning_rate": 9.852219517663186e-07, "loss": 0.7402126789093018, "step": 14898 }, { "epoch": 18.280981595092026, "grad_norm": 0.25433310866355896, "learning_rate": 9.838272835365564e-07, "loss": 0.5887166261672974, "step": 14899 }, { "epoch": 18.282208588957054, "grad_norm": 0.25136280059814453, "learning_rate": 9.824335833302222e-07, "loss": 0.5705921649932861, "step": 14900 }, { "epoch": 18.283435582822086, "grad_norm": 0.2858131229877472, "learning_rate": 9.810408512034908e-07, "loss": 0.5064578652381897, "step": 14901 }, { "epoch": 18.284662576687115, "grad_norm": 0.2772005498409271, "learning_rate": 9.796490872124975e-07, "loss": 0.7065432071685791, "step": 14902 }, { "epoch": 18.285889570552147, "grad_norm": 0.26755502820014954, "learning_rate": 9.78258291413345e-07, "loss": 0.6741496324539185, "step": 14903 }, { "epoch": 18.28711656441718, "grad_norm": 0.25398534536361694, "learning_rate": 9.768684638620824e-07, "loss": 0.6294674873352051, "step": 14904 }, { "epoch": 18.288343558282207, "grad_norm": 0.2427646517753601, "learning_rate": 9.754796046147402e-07, "loss": 0.6190599799156189, "step": 14905 }, { "epoch": 18.28957055214724, "grad_norm": 0.21861590445041656, "learning_rate": 9.740917137272932e-07, "loss": 0.469300240278244, "step": 14906 }, { "epoch": 18.29079754601227, "grad_norm": 0.27587732672691345, "learning_rate": 9.727047912556853e-07, "loss": 0.5593298077583313, "step": 14907 }, { "epoch": 18.2920245398773, "grad_norm": 0.2588701546192169, "learning_rate": 9.71318837255822e-07, "loss": 0.5040408968925476, "step": 14908 }, { "epoch": 18.293251533742332, "grad_norm": 0.2517450749874115, "learning_rate": 9.69933851783561e-07, "loss": 0.5501172542572021, "step": 14909 }, { "epoch": 18.29447852760736, "grad_norm": 0.2505769729614258, "learning_rate": 9.685498348947303e-07, "loss": 0.6196407079696655, "step": 14910 }, { "epoch": 18.295705521472392, "grad_norm": 0.22502915561199188, "learning_rate": 9.671667866451156e-07, "loss": 0.3413742184638977, "step": 14911 }, { "epoch": 18.296932515337424, "grad_norm": 0.2648622989654541, "learning_rate": 9.65784707090467e-07, "loss": 0.6112321615219116, "step": 14912 }, { "epoch": 18.298159509202453, "grad_norm": 0.28468289971351624, "learning_rate": 9.644035962864866e-07, "loss": 0.6045237183570862, "step": 14913 }, { "epoch": 18.299386503067485, "grad_norm": 0.27909916639328003, "learning_rate": 9.630234542888472e-07, "loss": 0.5847504138946533, "step": 14914 }, { "epoch": 18.300613496932517, "grad_norm": 0.28523364663124084, "learning_rate": 9.616442811531735e-07, "loss": 0.46721869707107544, "step": 14915 }, { "epoch": 18.301840490797545, "grad_norm": 0.2266593724489212, "learning_rate": 9.602660769350624e-07, "loss": 0.5114679336547852, "step": 14916 }, { "epoch": 18.303067484662577, "grad_norm": 0.29576271772384644, "learning_rate": 9.588888416900616e-07, "loss": 0.44320952892303467, "step": 14917 }, { "epoch": 18.30429447852761, "grad_norm": 0.2592233419418335, "learning_rate": 9.575125754736874e-07, "loss": 0.4973076581954956, "step": 14918 }, { "epoch": 18.305521472392638, "grad_norm": 0.2702350914478302, "learning_rate": 9.56137278341407e-07, "loss": 0.44164231419563293, "step": 14919 }, { "epoch": 18.30674846625767, "grad_norm": 0.30028244853019714, "learning_rate": 9.547629503486561e-07, "loss": 0.42793703079223633, "step": 14920 }, { "epoch": 18.307975460122698, "grad_norm": 0.26055872440338135, "learning_rate": 9.533895915508323e-07, "loss": 0.5761421918869019, "step": 14921 }, { "epoch": 18.30920245398773, "grad_norm": 0.2877582013607025, "learning_rate": 9.520172020032913e-07, "loss": 0.6294504404067993, "step": 14922 }, { "epoch": 18.310429447852762, "grad_norm": 0.2876107692718506, "learning_rate": 9.506457817613529e-07, "loss": 0.5706079006195068, "step": 14923 }, { "epoch": 18.31165644171779, "grad_norm": 0.19851307570934296, "learning_rate": 9.492753308802893e-07, "loss": 0.16998180747032166, "step": 14924 }, { "epoch": 18.312883435582823, "grad_norm": 0.26523008942604065, "learning_rate": 9.479058494153425e-07, "loss": 0.7710438370704651, "step": 14925 }, { "epoch": 18.314110429447855, "grad_norm": 0.29215750098228455, "learning_rate": 9.465373374217102e-07, "loss": 0.5719816088676453, "step": 14926 }, { "epoch": 18.315337423312883, "grad_norm": 0.2619645595550537, "learning_rate": 9.45169794954559e-07, "loss": 0.48013782501220703, "step": 14927 }, { "epoch": 18.316564417177915, "grad_norm": 0.25323647260665894, "learning_rate": 9.438032220690063e-07, "loss": 0.613540768623352, "step": 14928 }, { "epoch": 18.317791411042943, "grad_norm": 0.2778703570365906, "learning_rate": 9.424376188201328e-07, "loss": 0.7153140306472778, "step": 14929 }, { "epoch": 18.319018404907975, "grad_norm": 0.27868151664733887, "learning_rate": 9.410729852629863e-07, "loss": 0.5897961854934692, "step": 14930 }, { "epoch": 18.320245398773007, "grad_norm": 0.24093686044216156, "learning_rate": 9.397093214525698e-07, "loss": 0.3692380487918854, "step": 14931 }, { "epoch": 18.321472392638036, "grad_norm": 0.3463858962059021, "learning_rate": 9.383466274438452e-07, "loss": 0.8518167734146118, "step": 14932 }, { "epoch": 18.322699386503068, "grad_norm": 0.23572686314582825, "learning_rate": 9.369849032917488e-07, "loss": 0.49407288432121277, "step": 14933 }, { "epoch": 18.3239263803681, "grad_norm": 0.2811199724674225, "learning_rate": 9.356241490511564e-07, "loss": 0.6770088076591492, "step": 14934 }, { "epoch": 18.32515337423313, "grad_norm": 0.2873893678188324, "learning_rate": 9.342643647769183e-07, "loss": 0.6534485220909119, "step": 14935 }, { "epoch": 18.32638036809816, "grad_norm": 0.24797888100147247, "learning_rate": 9.329055505238466e-07, "loss": 0.37418586015701294, "step": 14936 }, { "epoch": 18.32760736196319, "grad_norm": 0.277547150850296, "learning_rate": 9.315477063467165e-07, "loss": 0.5396980047225952, "step": 14937 }, { "epoch": 18.32883435582822, "grad_norm": 0.2659480571746826, "learning_rate": 9.301908323002484e-07, "loss": 0.7431876063346863, "step": 14938 }, { "epoch": 18.330061349693253, "grad_norm": 0.2782931625843048, "learning_rate": 9.288349284391401e-07, "loss": 0.6341907978057861, "step": 14939 }, { "epoch": 18.33128834355828, "grad_norm": 0.2460147887468338, "learning_rate": 9.274799948180424e-07, "loss": 0.5243320465087891, "step": 14940 }, { "epoch": 18.332515337423313, "grad_norm": 0.29538068175315857, "learning_rate": 9.261260314915699e-07, "loss": 0.7207489013671875, "step": 14941 }, { "epoch": 18.333742331288345, "grad_norm": 0.28071385622024536, "learning_rate": 9.247730385142955e-07, "loss": 0.4540262818336487, "step": 14942 }, { "epoch": 18.334969325153374, "grad_norm": 0.23607057332992554, "learning_rate": 9.234210159407563e-07, "loss": 0.4125019907951355, "step": 14943 }, { "epoch": 18.336196319018406, "grad_norm": 0.2712760865688324, "learning_rate": 9.220699638254449e-07, "loss": 0.6386152505874634, "step": 14944 }, { "epoch": 18.337423312883434, "grad_norm": 0.2476544976234436, "learning_rate": 9.20719882222823e-07, "loss": 0.5701817870140076, "step": 14945 }, { "epoch": 18.338650306748466, "grad_norm": 0.2878611981868744, "learning_rate": 9.193707711873057e-07, "loss": 0.7396513223648071, "step": 14946 }, { "epoch": 18.339877300613498, "grad_norm": 0.2748095393180847, "learning_rate": 9.180226307732742e-07, "loss": 0.7712901830673218, "step": 14947 }, { "epoch": 18.341104294478527, "grad_norm": 0.31877246499061584, "learning_rate": 9.166754610350631e-07, "loss": 0.38149493932724, "step": 14948 }, { "epoch": 18.34233128834356, "grad_norm": 0.29055070877075195, "learning_rate": 9.153292620269787e-07, "loss": 0.7069321870803833, "step": 14949 }, { "epoch": 18.34355828220859, "grad_norm": 0.2262503206729889, "learning_rate": 9.139840338032779e-07, "loss": 0.3048746585845947, "step": 14950 }, { "epoch": 18.34478527607362, "grad_norm": 0.27305489778518677, "learning_rate": 9.126397764181866e-07, "loss": 0.5030574798583984, "step": 14951 }, { "epoch": 18.34601226993865, "grad_norm": 0.24597647786140442, "learning_rate": 9.112964899258891e-07, "loss": 0.5498008728027344, "step": 14952 }, { "epoch": 18.34723926380368, "grad_norm": 0.2601284086704254, "learning_rate": 9.099541743805257e-07, "loss": 0.5760515332221985, "step": 14953 }, { "epoch": 18.34846625766871, "grad_norm": 0.27047309279441833, "learning_rate": 9.086128298362001e-07, "loss": 0.3212139308452606, "step": 14954 }, { "epoch": 18.349693251533743, "grad_norm": 0.302736759185791, "learning_rate": 9.072724563469831e-07, "loss": 0.8365401029586792, "step": 14955 }, { "epoch": 18.350920245398772, "grad_norm": 0.28552481532096863, "learning_rate": 9.059330539668953e-07, "loss": 0.6756923794746399, "step": 14956 }, { "epoch": 18.352147239263804, "grad_norm": 0.25062835216522217, "learning_rate": 9.045946227499297e-07, "loss": 0.5296375751495361, "step": 14957 }, { "epoch": 18.353374233128836, "grad_norm": 0.30365926027297974, "learning_rate": 9.032571627500319e-07, "loss": 0.7329384088516235, "step": 14958 }, { "epoch": 18.354601226993864, "grad_norm": 0.23207053542137146, "learning_rate": 9.019206740211144e-07, "loss": 0.4031626880168915, "step": 14959 }, { "epoch": 18.355828220858896, "grad_norm": 0.28662484884262085, "learning_rate": 9.005851566170426e-07, "loss": 0.6922318339347839, "step": 14960 }, { "epoch": 18.357055214723925, "grad_norm": 0.24328899383544922, "learning_rate": 8.992506105916509e-07, "loss": 0.6497803926467896, "step": 14961 }, { "epoch": 18.358282208588957, "grad_norm": 0.2737060785293579, "learning_rate": 8.979170359987299e-07, "loss": 0.4869771897792816, "step": 14962 }, { "epoch": 18.35950920245399, "grad_norm": 0.22458438575267792, "learning_rate": 8.965844328920281e-07, "loss": 0.31408435106277466, "step": 14963 }, { "epoch": 18.360736196319017, "grad_norm": 0.30391135811805725, "learning_rate": 8.952528013252665e-07, "loss": 0.6306331753730774, "step": 14964 }, { "epoch": 18.36196319018405, "grad_norm": 0.28336378931999207, "learning_rate": 8.939221413521132e-07, "loss": 0.5200832486152649, "step": 14965 }, { "epoch": 18.36319018404908, "grad_norm": 0.26279789209365845, "learning_rate": 8.925924530262087e-07, "loss": 0.6349048614501953, "step": 14966 }, { "epoch": 18.36441717791411, "grad_norm": 0.25689712166786194, "learning_rate": 8.912637364011434e-07, "loss": 0.6326661705970764, "step": 14967 }, { "epoch": 18.36564417177914, "grad_norm": 0.3032512664794922, "learning_rate": 8.899359915304772e-07, "loss": 0.6846915483474731, "step": 14968 }, { "epoch": 18.36687116564417, "grad_norm": 0.2609926462173462, "learning_rate": 8.886092184677258e-07, "loss": 0.7377516031265259, "step": 14969 }, { "epoch": 18.368098159509202, "grad_norm": 0.2782549560070038, "learning_rate": 8.872834172663714e-07, "loss": 0.5318039655685425, "step": 14970 }, { "epoch": 18.369325153374234, "grad_norm": 0.2657947838306427, "learning_rate": 8.859585879798515e-07, "loss": 0.781874418258667, "step": 14971 }, { "epoch": 18.370552147239263, "grad_norm": 0.24260665476322174, "learning_rate": 8.846347306615626e-07, "loss": 0.6092846393585205, "step": 14972 }, { "epoch": 18.371779141104295, "grad_norm": 0.2657587230205536, "learning_rate": 8.833118453648703e-07, "loss": 0.7326947450637817, "step": 14973 }, { "epoch": 18.373006134969327, "grad_norm": 0.27869996428489685, "learning_rate": 8.81989932143093e-07, "loss": 0.5424805879592896, "step": 14974 }, { "epoch": 18.374233128834355, "grad_norm": 0.291361004114151, "learning_rate": 8.806689910495158e-07, "loss": 0.5088820457458496, "step": 14975 }, { "epoch": 18.375460122699387, "grad_norm": 0.24230031669139862, "learning_rate": 8.793490221373851e-07, "loss": 0.487094521522522, "step": 14976 }, { "epoch": 18.376687116564415, "grad_norm": 0.26783737540245056, "learning_rate": 8.780300254598944e-07, "loss": 0.7180638313293457, "step": 14977 }, { "epoch": 18.377914110429447, "grad_norm": 0.3031861186027527, "learning_rate": 8.767120010702179e-07, "loss": 0.6006426215171814, "step": 14978 }, { "epoch": 18.37914110429448, "grad_norm": 0.26976341009140015, "learning_rate": 8.753949490214768e-07, "loss": 0.6655715107917786, "step": 14979 }, { "epoch": 18.380368098159508, "grad_norm": 0.26862215995788574, "learning_rate": 8.740788693667651e-07, "loss": 0.6731605529785156, "step": 14980 }, { "epoch": 18.38159509202454, "grad_norm": 0.26043933629989624, "learning_rate": 8.727637621591206e-07, "loss": 0.6125415563583374, "step": 14981 }, { "epoch": 18.382822085889572, "grad_norm": 0.2744832932949066, "learning_rate": 8.714496274515566e-07, "loss": 0.6705185174942017, "step": 14982 }, { "epoch": 18.3840490797546, "grad_norm": 0.2835293412208557, "learning_rate": 8.701364652970417e-07, "loss": 0.6601405143737793, "step": 14983 }, { "epoch": 18.385276073619632, "grad_norm": 0.25995269417762756, "learning_rate": 8.688242757485032e-07, "loss": 0.6478754281997681, "step": 14984 }, { "epoch": 18.38650306748466, "grad_norm": 0.26614174246788025, "learning_rate": 8.675130588588404e-07, "loss": 0.5839325189590454, "step": 14985 }, { "epoch": 18.387730061349693, "grad_norm": 0.2758522927761078, "learning_rate": 8.662028146808942e-07, "loss": 0.5387250781059265, "step": 14986 }, { "epoch": 18.388957055214725, "grad_norm": 0.25011372566223145, "learning_rate": 8.648935432674781e-07, "loss": 0.6700847148895264, "step": 14987 }, { "epoch": 18.390184049079753, "grad_norm": 0.28865793347358704, "learning_rate": 8.635852446713694e-07, "loss": 0.5336388349533081, "step": 14988 }, { "epoch": 18.391411042944785, "grad_norm": 0.2895233631134033, "learning_rate": 8.622779189453007e-07, "loss": 0.7537575960159302, "step": 14989 }, { "epoch": 18.392638036809817, "grad_norm": 0.250654935836792, "learning_rate": 8.609715661419687e-07, "loss": 0.37646153569221497, "step": 14990 }, { "epoch": 18.393865030674846, "grad_norm": 0.2651398777961731, "learning_rate": 8.59666186314026e-07, "loss": 0.517984926700592, "step": 14991 }, { "epoch": 18.395092024539878, "grad_norm": 0.27186036109924316, "learning_rate": 8.583617795140859e-07, "loss": 0.5918647050857544, "step": 14992 }, { "epoch": 18.39631901840491, "grad_norm": 0.27151286602020264, "learning_rate": 8.570583457947285e-07, "loss": 0.6331974267959595, "step": 14993 }, { "epoch": 18.397546012269938, "grad_norm": 0.27655377984046936, "learning_rate": 8.557558852084924e-07, "loss": 0.6269717812538147, "step": 14994 }, { "epoch": 18.39877300613497, "grad_norm": 0.2710462212562561, "learning_rate": 8.544543978078773e-07, "loss": 0.6422926187515259, "step": 14995 }, { "epoch": 18.4, "grad_norm": 0.3061816692352295, "learning_rate": 8.531538836453357e-07, "loss": 0.7262202501296997, "step": 14996 }, { "epoch": 18.40122699386503, "grad_norm": 0.26564717292785645, "learning_rate": 8.51854342773295e-07, "loss": 0.800443172454834, "step": 14997 }, { "epoch": 18.402453987730063, "grad_norm": 0.27169930934906006, "learning_rate": 8.505557752441301e-07, "loss": 0.5864543318748474, "step": 14998 }, { "epoch": 18.40368098159509, "grad_norm": 0.26096153259277344, "learning_rate": 8.49258181110188e-07, "loss": 0.4188769459724426, "step": 14999 }, { "epoch": 18.404907975460123, "grad_norm": 0.2749757766723633, "learning_rate": 8.479615604237712e-07, "loss": 0.7002823352813721, "step": 15000 }, { "epoch": 18.406134969325155, "grad_norm": 0.26988518238067627, "learning_rate": 8.466659132371324e-07, "loss": 0.5017666220664978, "step": 15001 }, { "epoch": 18.407361963190183, "grad_norm": 0.32096439599990845, "learning_rate": 8.453712396025076e-07, "loss": 0.7862387895584106, "step": 15002 }, { "epoch": 18.408588957055215, "grad_norm": 0.2584100067615509, "learning_rate": 8.440775395720773e-07, "loss": 0.5598587393760681, "step": 15003 }, { "epoch": 18.409815950920244, "grad_norm": 0.27189522981643677, "learning_rate": 8.427848131979887e-07, "loss": 0.5677369236946106, "step": 15004 }, { "epoch": 18.411042944785276, "grad_norm": 0.28630581498146057, "learning_rate": 8.414930605323446e-07, "loss": 0.7373979091644287, "step": 15005 }, { "epoch": 18.412269938650308, "grad_norm": 0.25525593757629395, "learning_rate": 8.402022816272115e-07, "loss": 0.6582209467887878, "step": 15006 }, { "epoch": 18.413496932515336, "grad_norm": 0.24422357976436615, "learning_rate": 8.389124765346173e-07, "loss": 0.5487877130508423, "step": 15007 }, { "epoch": 18.41472392638037, "grad_norm": 0.2570844888687134, "learning_rate": 8.376236453065539e-07, "loss": 0.6310504078865051, "step": 15008 }, { "epoch": 18.4159509202454, "grad_norm": 0.283956378698349, "learning_rate": 8.363357879949685e-07, "loss": 0.47886431217193604, "step": 15009 }, { "epoch": 18.41717791411043, "grad_norm": 0.2573699653148651, "learning_rate": 8.350489046517695e-07, "loss": 0.6702180504798889, "step": 15010 }, { "epoch": 18.41840490797546, "grad_norm": 0.2506057024002075, "learning_rate": 8.337629953288295e-07, "loss": 0.6182423830032349, "step": 15011 }, { "epoch": 18.41963190184049, "grad_norm": 0.2700954079627991, "learning_rate": 8.324780600779764e-07, "loss": 0.8321703672409058, "step": 15012 }, { "epoch": 18.42085889570552, "grad_norm": 0.2533617913722992, "learning_rate": 8.311940989510076e-07, "loss": 0.6778624057769775, "step": 15013 }, { "epoch": 18.422085889570553, "grad_norm": 0.23095104098320007, "learning_rate": 8.299111119996706e-07, "loss": 0.43314534425735474, "step": 15014 }, { "epoch": 18.42331288343558, "grad_norm": 0.2955207824707031, "learning_rate": 8.286290992756851e-07, "loss": 0.7053745985031128, "step": 15015 }, { "epoch": 18.424539877300614, "grad_norm": 0.30995631217956543, "learning_rate": 8.273480608307182e-07, "loss": 0.736630916595459, "step": 15016 }, { "epoch": 18.425766871165646, "grad_norm": 0.26751789450645447, "learning_rate": 8.260679967164092e-07, "loss": 0.6557402610778809, "step": 15017 }, { "epoch": 18.426993865030674, "grad_norm": 0.2378382384777069, "learning_rate": 8.247889069843529e-07, "loss": 0.5029628872871399, "step": 15018 }, { "epoch": 18.428220858895706, "grad_norm": 0.2665591537952423, "learning_rate": 8.235107916861052e-07, "loss": 0.5210702419281006, "step": 15019 }, { "epoch": 18.429447852760735, "grad_norm": 0.25683659315109253, "learning_rate": 8.222336508731887e-07, "loss": 0.6265289187431335, "step": 15020 }, { "epoch": 18.430674846625767, "grad_norm": 0.2376331090927124, "learning_rate": 8.209574845970735e-07, "loss": 0.4027339816093445, "step": 15021 }, { "epoch": 18.4319018404908, "grad_norm": 0.2511342763900757, "learning_rate": 8.19682292909199e-07, "loss": 0.5003116130828857, "step": 15022 }, { "epoch": 18.433128834355827, "grad_norm": 0.262071430683136, "learning_rate": 8.184080758609658e-07, "loss": 0.5126713514328003, "step": 15023 }, { "epoch": 18.43435582822086, "grad_norm": 0.26501163840293884, "learning_rate": 8.17134833503741e-07, "loss": 0.4675607681274414, "step": 15024 }, { "epoch": 18.43558282208589, "grad_norm": 0.29066699743270874, "learning_rate": 8.158625658888335e-07, "loss": 0.5905107259750366, "step": 15025 }, { "epoch": 18.43680981595092, "grad_norm": 0.2355525642633438, "learning_rate": 8.145912730675331e-07, "loss": 0.3430643081665039, "step": 15026 }, { "epoch": 18.43803680981595, "grad_norm": 0.274815171957016, "learning_rate": 8.13320955091082e-07, "loss": 0.633973240852356, "step": 15027 }, { "epoch": 18.43926380368098, "grad_norm": 0.3103507459163666, "learning_rate": 8.120516120106753e-07, "loss": 0.6810316443443298, "step": 15028 }, { "epoch": 18.440490797546012, "grad_norm": 0.25461915135383606, "learning_rate": 8.107832438774887e-07, "loss": 0.4680299460887909, "step": 15029 }, { "epoch": 18.441717791411044, "grad_norm": 0.296601802110672, "learning_rate": 8.09515850742637e-07, "loss": 0.5009694695472717, "step": 15030 }, { "epoch": 18.442944785276072, "grad_norm": 0.24857783317565918, "learning_rate": 8.082494326572043e-07, "loss": 0.4273577928543091, "step": 15031 }, { "epoch": 18.444171779141104, "grad_norm": 0.23995882272720337, "learning_rate": 8.069839896722442e-07, "loss": 0.3288445770740509, "step": 15032 }, { "epoch": 18.445398773006136, "grad_norm": 0.24957355856895447, "learning_rate": 8.057195218387547e-07, "loss": 0.5318127870559692, "step": 15033 }, { "epoch": 18.446625766871165, "grad_norm": 0.26561078429222107, "learning_rate": 8.044560292077146e-07, "loss": 0.6572602987289429, "step": 15034 }, { "epoch": 18.447852760736197, "grad_norm": 0.24103668332099915, "learning_rate": 8.031935118300388e-07, "loss": 0.5004051923751831, "step": 15035 }, { "epoch": 18.449079754601225, "grad_norm": 0.316873162984848, "learning_rate": 8.019319697566196e-07, "loss": 0.7156195044517517, "step": 15036 }, { "epoch": 18.450306748466257, "grad_norm": 0.25244203209877014, "learning_rate": 8.006714030383084e-07, "loss": 0.6055111885070801, "step": 15037 }, { "epoch": 18.45153374233129, "grad_norm": 0.29162099957466125, "learning_rate": 7.99411811725917e-07, "loss": 0.6415433883666992, "step": 15038 }, { "epoch": 18.452760736196318, "grad_norm": 0.23962543904781342, "learning_rate": 7.981531958702131e-07, "loss": 0.5539005994796753, "step": 15039 }, { "epoch": 18.45398773006135, "grad_norm": 0.24174335598945618, "learning_rate": 7.968955555219259e-07, "loss": 0.6746041774749756, "step": 15040 }, { "epoch": 18.45521472392638, "grad_norm": 0.2767097055912018, "learning_rate": 7.956388907317508e-07, "loss": 0.5615209937095642, "step": 15041 }, { "epoch": 18.45644171779141, "grad_norm": 0.255563884973526, "learning_rate": 7.943832015503361e-07, "loss": 0.6252062320709229, "step": 15042 }, { "epoch": 18.457668711656442, "grad_norm": 0.25251320004463196, "learning_rate": 7.931284880282997e-07, "loss": 0.6074473261833191, "step": 15043 }, { "epoch": 18.45889570552147, "grad_norm": 0.2742644250392914, "learning_rate": 7.918747502162177e-07, "loss": 0.6066598892211914, "step": 15044 }, { "epoch": 18.460122699386503, "grad_norm": 0.30004239082336426, "learning_rate": 7.906219881646165e-07, "loss": 0.6686370968818665, "step": 15045 }, { "epoch": 18.461349693251535, "grad_norm": 0.29838523268699646, "learning_rate": 7.893702019239946e-07, "loss": 0.39523789286613464, "step": 15046 }, { "epoch": 18.462576687116563, "grad_norm": 0.2740970253944397, "learning_rate": 7.881193915448087e-07, "loss": 0.5338302850723267, "step": 15047 }, { "epoch": 18.463803680981595, "grad_norm": 0.2426895797252655, "learning_rate": 7.868695570774797e-07, "loss": 0.42754417657852173, "step": 15048 }, { "epoch": 18.465030674846627, "grad_norm": 0.28807586431503296, "learning_rate": 7.856206985723786e-07, "loss": 0.6008538603782654, "step": 15049 }, { "epoch": 18.466257668711656, "grad_norm": 0.21640616655349731, "learning_rate": 7.84372816079848e-07, "loss": 0.28484609723091125, "step": 15050 }, { "epoch": 18.467484662576688, "grad_norm": 0.27961060404777527, "learning_rate": 7.831259096501814e-07, "loss": 0.43502914905548096, "step": 15051 }, { "epoch": 18.46871165644172, "grad_norm": 0.25631678104400635, "learning_rate": 7.818799793336412e-07, "loss": 0.6460347175598145, "step": 15052 }, { "epoch": 18.469938650306748, "grad_norm": 0.3183768689632416, "learning_rate": 7.806350251804484e-07, "loss": 0.7350654602050781, "step": 15053 }, { "epoch": 18.47116564417178, "grad_norm": 0.30789369344711304, "learning_rate": 7.793910472407795e-07, "loss": 0.4440527856349945, "step": 15054 }, { "epoch": 18.47239263803681, "grad_norm": 0.2752784490585327, "learning_rate": 7.781480455647778e-07, "loss": 0.553666353225708, "step": 15055 }, { "epoch": 18.47361963190184, "grad_norm": 0.2633611559867859, "learning_rate": 7.769060202025474e-07, "loss": 0.7231197357177734, "step": 15056 }, { "epoch": 18.474846625766872, "grad_norm": 0.2986191213130951, "learning_rate": 7.756649712041486e-07, "loss": 0.6437402963638306, "step": 15057 }, { "epoch": 18.4760736196319, "grad_norm": 0.2688346207141876, "learning_rate": 7.744248986196051e-07, "loss": 0.46620458364486694, "step": 15058 }, { "epoch": 18.477300613496933, "grad_norm": 0.25008657574653625, "learning_rate": 7.731858024989019e-07, "loss": 0.6280673742294312, "step": 15059 }, { "epoch": 18.478527607361965, "grad_norm": 0.24196794629096985, "learning_rate": 7.71947682891977e-07, "loss": 0.5630426406860352, "step": 15060 }, { "epoch": 18.479754601226993, "grad_norm": 0.36446908116340637, "learning_rate": 7.70710539848743e-07, "loss": 0.5952461957931519, "step": 15061 }, { "epoch": 18.480981595092025, "grad_norm": 0.267814964056015, "learning_rate": 7.694743734190657e-07, "loss": 0.4886084198951721, "step": 15062 }, { "epoch": 18.482208588957054, "grad_norm": 0.246405690908432, "learning_rate": 7.682391836527664e-07, "loss": 0.5635120868682861, "step": 15063 }, { "epoch": 18.483435582822086, "grad_norm": 0.24214372038841248, "learning_rate": 7.670049705996357e-07, "loss": 0.5162582397460938, "step": 15064 }, { "epoch": 18.484662576687118, "grad_norm": 0.2803967595100403, "learning_rate": 7.657717343094173e-07, "loss": 0.7461479902267456, "step": 15065 }, { "epoch": 18.485889570552146, "grad_norm": 0.2980840802192688, "learning_rate": 7.645394748318241e-07, "loss": 0.6661889553070068, "step": 15066 }, { "epoch": 18.487116564417178, "grad_norm": 0.2393178939819336, "learning_rate": 7.63308192216522e-07, "loss": 0.6595220565795898, "step": 15067 }, { "epoch": 18.48834355828221, "grad_norm": 0.25572678446769714, "learning_rate": 7.620778865131406e-07, "loss": 0.48996564745903015, "step": 15068 }, { "epoch": 18.48957055214724, "grad_norm": 0.2874976694583893, "learning_rate": 7.608485577712737e-07, "loss": 0.6075916290283203, "step": 15069 }, { "epoch": 18.49079754601227, "grad_norm": 0.24423320591449738, "learning_rate": 7.596202060404678e-07, "loss": 0.5831315517425537, "step": 15070 }, { "epoch": 18.4920245398773, "grad_norm": 0.24751967191696167, "learning_rate": 7.583928313702332e-07, "loss": 0.6452398300170898, "step": 15071 }, { "epoch": 18.49325153374233, "grad_norm": 0.2806456685066223, "learning_rate": 7.571664338100498e-07, "loss": 0.6140261888504028, "step": 15072 }, { "epoch": 18.494478527607363, "grad_norm": 0.27064502239227295, "learning_rate": 7.55941013409342e-07, "loss": 0.7779956459999084, "step": 15073 }, { "epoch": 18.49570552147239, "grad_norm": 0.2319720834493637, "learning_rate": 7.547165702175036e-07, "loss": 0.5071749091148376, "step": 15074 }, { "epoch": 18.496932515337424, "grad_norm": 0.2741059958934784, "learning_rate": 7.534931042838922e-07, "loss": 0.5579279661178589, "step": 15075 }, { "epoch": 18.498159509202456, "grad_norm": 0.24883468449115753, "learning_rate": 7.522706156578214e-07, "loss": 0.7345324754714966, "step": 15076 }, { "epoch": 18.499386503067484, "grad_norm": 0.25426188111305237, "learning_rate": 7.510491043885681e-07, "loss": 0.5537772178649902, "step": 15077 }, { "epoch": 18.500613496932516, "grad_norm": 0.27426281571388245, "learning_rate": 7.498285705253599e-07, "loss": 0.8123739957809448, "step": 15078 }, { "epoch": 18.501840490797544, "grad_norm": 0.28615546226501465, "learning_rate": 7.48609014117399e-07, "loss": 0.5964265465736389, "step": 15079 }, { "epoch": 18.503067484662576, "grad_norm": 0.30579593777656555, "learning_rate": 7.473904352138434e-07, "loss": 0.7176042795181274, "step": 15080 }, { "epoch": 18.50429447852761, "grad_norm": 0.255885511636734, "learning_rate": 7.461728338638091e-07, "loss": 0.5266407132148743, "step": 15081 }, { "epoch": 18.505521472392637, "grad_norm": 0.2587753236293793, "learning_rate": 7.449562101163737e-07, "loss": 0.6365657448768616, "step": 15082 }, { "epoch": 18.50674846625767, "grad_norm": 0.24876601994037628, "learning_rate": 7.437405640205757e-07, "loss": 0.5127677917480469, "step": 15083 }, { "epoch": 18.5079754601227, "grad_norm": 0.23445630073547363, "learning_rate": 7.425258956254149e-07, "loss": 0.6411794424057007, "step": 15084 }, { "epoch": 18.50920245398773, "grad_norm": 0.24745701253414154, "learning_rate": 7.413122049798493e-07, "loss": 0.5648179054260254, "step": 15085 }, { "epoch": 18.51042944785276, "grad_norm": 0.25540637969970703, "learning_rate": 7.400994921328009e-07, "loss": 0.6375343799591064, "step": 15086 }, { "epoch": 18.51165644171779, "grad_norm": 0.2614000737667084, "learning_rate": 7.388877571331526e-07, "loss": 0.75091552734375, "step": 15087 }, { "epoch": 18.512883435582822, "grad_norm": 0.2700320780277252, "learning_rate": 7.376770000297434e-07, "loss": 0.6141526699066162, "step": 15088 }, { "epoch": 18.514110429447854, "grad_norm": 0.2803581953048706, "learning_rate": 7.36467220871373e-07, "loss": 0.6992532014846802, "step": 15089 }, { "epoch": 18.515337423312882, "grad_norm": 0.4579346179962158, "learning_rate": 7.352584197068052e-07, "loss": 0.635081946849823, "step": 15090 }, { "epoch": 18.516564417177914, "grad_norm": 0.23495975136756897, "learning_rate": 7.340505965847733e-07, "loss": 0.5005642175674438, "step": 15091 }, { "epoch": 18.517791411042946, "grad_norm": 0.26014724373817444, "learning_rate": 7.328437515539494e-07, "loss": 0.4779026508331299, "step": 15092 }, { "epoch": 18.519018404907975, "grad_norm": 0.299617201089859, "learning_rate": 7.316378846629806e-07, "loss": 0.5042616128921509, "step": 15093 }, { "epoch": 18.520245398773007, "grad_norm": 0.26069051027297974, "learning_rate": 7.304329959604728e-07, "loss": 0.4813694357872009, "step": 15094 }, { "epoch": 18.521472392638035, "grad_norm": 0.28472867608070374, "learning_rate": 7.292290854949924e-07, "loss": 0.6566644906997681, "step": 15095 }, { "epoch": 18.522699386503067, "grad_norm": 0.27900993824005127, "learning_rate": 7.280261533150701e-07, "loss": 0.5997101068496704, "step": 15096 }, { "epoch": 18.5239263803681, "grad_norm": 0.27072933316230774, "learning_rate": 7.268241994691838e-07, "loss": 0.4074103534221649, "step": 15097 }, { "epoch": 18.525153374233128, "grad_norm": 0.2461530566215515, "learning_rate": 7.256232240057836e-07, "loss": 0.5919272899627686, "step": 15098 }, { "epoch": 18.52638036809816, "grad_norm": 0.25327226519584656, "learning_rate": 7.244232269732781e-07, "loss": 0.5056906938552856, "step": 15099 }, { "epoch": 18.52760736196319, "grad_norm": 0.24240347743034363, "learning_rate": 7.232242084200369e-07, "loss": 0.5309599041938782, "step": 15100 }, { "epoch": 18.52883435582822, "grad_norm": 0.27513226866722107, "learning_rate": 7.220261683943936e-07, "loss": 0.6863803863525391, "step": 15101 }, { "epoch": 18.530061349693252, "grad_norm": 0.26228705048561096, "learning_rate": 7.20829106944626e-07, "loss": 0.5250183939933777, "step": 15102 }, { "epoch": 18.53128834355828, "grad_norm": 0.2649267911911011, "learning_rate": 7.19633024118993e-07, "loss": 0.5145870447158813, "step": 15103 }, { "epoch": 18.532515337423312, "grad_norm": 0.27462324500083923, "learning_rate": 7.184379199657032e-07, "loss": 0.649298906326294, "step": 15104 }, { "epoch": 18.533742331288344, "grad_norm": 0.265155553817749, "learning_rate": 7.172437945329264e-07, "loss": 0.5217992067337036, "step": 15105 }, { "epoch": 18.534969325153373, "grad_norm": 0.2447088658809662, "learning_rate": 7.16050647868799e-07, "loss": 0.3777480721473694, "step": 15106 }, { "epoch": 18.536196319018405, "grad_norm": 0.2790638506412506, "learning_rate": 7.148584800214048e-07, "loss": 0.566234827041626, "step": 15107 }, { "epoch": 18.537423312883437, "grad_norm": 0.24771304428577423, "learning_rate": 7.136672910388054e-07, "loss": 0.5184342861175537, "step": 15108 }, { "epoch": 18.538650306748465, "grad_norm": 0.29097306728363037, "learning_rate": 7.124770809690095e-07, "loss": 0.44570547342300415, "step": 15109 }, { "epoch": 18.539877300613497, "grad_norm": 0.2520453631877899, "learning_rate": 7.112878498599929e-07, "loss": 0.5955885648727417, "step": 15110 }, { "epoch": 18.54110429447853, "grad_norm": 0.2866119146347046, "learning_rate": 7.100995977596919e-07, "loss": 0.6235331296920776, "step": 15111 }, { "epoch": 18.542331288343558, "grad_norm": 0.2389226108789444, "learning_rate": 7.089123247159935e-07, "loss": 0.22559209167957306, "step": 15112 }, { "epoch": 18.54355828220859, "grad_norm": 0.253071129322052, "learning_rate": 7.077260307767592e-07, "loss": 0.6456031799316406, "step": 15113 }, { "epoch": 18.54478527607362, "grad_norm": 0.25669771432876587, "learning_rate": 7.065407159898091e-07, "loss": 0.5606966018676758, "step": 15114 }, { "epoch": 18.54601226993865, "grad_norm": 0.28720763325691223, "learning_rate": 7.053563804029162e-07, "loss": 0.6883248090744019, "step": 15115 }, { "epoch": 18.547239263803682, "grad_norm": 0.3068024218082428, "learning_rate": 7.041730240638145e-07, "loss": 0.6839640140533447, "step": 15116 }, { "epoch": 18.54846625766871, "grad_norm": 0.24132217466831207, "learning_rate": 7.029906470202046e-07, "loss": 0.49997156858444214, "step": 15117 }, { "epoch": 18.549693251533743, "grad_norm": 0.3131883442401886, "learning_rate": 7.018092493197432e-07, "loss": 0.5522582530975342, "step": 15118 }, { "epoch": 18.550920245398775, "grad_norm": 0.24126362800598145, "learning_rate": 7.006288310100528e-07, "loss": 0.5521384477615356, "step": 15119 }, { "epoch": 18.552147239263803, "grad_norm": 0.28721854090690613, "learning_rate": 6.994493921387096e-07, "loss": 0.5958684682846069, "step": 15120 }, { "epoch": 18.553374233128835, "grad_norm": 0.30204153060913086, "learning_rate": 6.982709327532561e-07, "loss": 0.7713477611541748, "step": 15121 }, { "epoch": 18.554601226993864, "grad_norm": 0.2600513696670532, "learning_rate": 6.970934529011874e-07, "loss": 0.48867762088775635, "step": 15122 }, { "epoch": 18.555828220858896, "grad_norm": 0.27114149928092957, "learning_rate": 6.959169526299686e-07, "loss": 0.7132159471511841, "step": 15123 }, { "epoch": 18.557055214723928, "grad_norm": 0.26623812317848206, "learning_rate": 6.947414319870171e-07, "loss": 0.5764646530151367, "step": 15124 }, { "epoch": 18.558282208588956, "grad_norm": 0.28244468569755554, "learning_rate": 6.935668910197202e-07, "loss": 0.7551293969154358, "step": 15125 }, { "epoch": 18.559509202453988, "grad_norm": 0.2580442726612091, "learning_rate": 6.923933297754204e-07, "loss": 0.5424528121948242, "step": 15126 }, { "epoch": 18.56073619631902, "grad_norm": 0.24951207637786865, "learning_rate": 6.912207483014161e-07, "loss": 0.6466777920722961, "step": 15127 }, { "epoch": 18.56196319018405, "grad_norm": 0.26366811990737915, "learning_rate": 6.900491466449721e-07, "loss": 0.5556007623672485, "step": 15128 }, { "epoch": 18.56319018404908, "grad_norm": 0.26368480920791626, "learning_rate": 6.888785248533119e-07, "loss": 0.6620758771896362, "step": 15129 }, { "epoch": 18.56441717791411, "grad_norm": 0.30060070753097534, "learning_rate": 6.877088829736228e-07, "loss": 0.7801352739334106, "step": 15130 }, { "epoch": 18.56564417177914, "grad_norm": 0.25798070430755615, "learning_rate": 6.865402210530503e-07, "loss": 0.626514732837677, "step": 15131 }, { "epoch": 18.566871165644173, "grad_norm": 0.30103838443756104, "learning_rate": 6.853725391386928e-07, "loss": 0.6700391173362732, "step": 15132 }, { "epoch": 18.5680981595092, "grad_norm": 0.2740756571292877, "learning_rate": 6.842058372776211e-07, "loss": 0.7246178984642029, "step": 15133 }, { "epoch": 18.569325153374233, "grad_norm": 0.27037662267684937, "learning_rate": 6.830401155168614e-07, "loss": 0.6969963312149048, "step": 15134 }, { "epoch": 18.570552147239265, "grad_norm": 0.2706049978733063, "learning_rate": 6.818753739034012e-07, "loss": 0.6336203813552856, "step": 15135 }, { "epoch": 18.571779141104294, "grad_norm": 0.23064523935317993, "learning_rate": 6.807116124841861e-07, "loss": 0.5598507523536682, "step": 15136 }, { "epoch": 18.573006134969326, "grad_norm": 0.26371335983276367, "learning_rate": 6.79548831306126e-07, "loss": 0.5162768363952637, "step": 15137 }, { "epoch": 18.574233128834354, "grad_norm": 0.28740817308425903, "learning_rate": 6.783870304160888e-07, "loss": 0.700891375541687, "step": 15138 }, { "epoch": 18.575460122699386, "grad_norm": 0.25728079676628113, "learning_rate": 6.772262098609039e-07, "loss": 0.519813060760498, "step": 15139 }, { "epoch": 18.57668711656442, "grad_norm": 0.2500128448009491, "learning_rate": 6.760663696873587e-07, "loss": 0.7611314058303833, "step": 15140 }, { "epoch": 18.577914110429447, "grad_norm": 0.20538310706615448, "learning_rate": 6.749075099422047e-07, "loss": 0.3309552073478699, "step": 15141 }, { "epoch": 18.57914110429448, "grad_norm": 0.27152785658836365, "learning_rate": 6.737496306721519e-07, "loss": 0.6586913466453552, "step": 15142 }, { "epoch": 18.58036809815951, "grad_norm": 0.2581484615802765, "learning_rate": 6.725927319238684e-07, "loss": 0.6152893304824829, "step": 15143 }, { "epoch": 18.58159509202454, "grad_norm": 0.2583959698677063, "learning_rate": 6.714368137439891e-07, "loss": 0.5489472150802612, "step": 15144 }, { "epoch": 18.58282208588957, "grad_norm": 0.2896976172924042, "learning_rate": 6.702818761791074e-07, "loss": 0.5843064188957214, "step": 15145 }, { "epoch": 18.5840490797546, "grad_norm": 0.32915428280830383, "learning_rate": 6.691279192757694e-07, "loss": 0.6426497101783752, "step": 15146 }, { "epoch": 18.58527607361963, "grad_norm": 0.27131614089012146, "learning_rate": 6.679749430804933e-07, "loss": 0.6845475435256958, "step": 15147 }, { "epoch": 18.586503067484664, "grad_norm": 0.27222830057144165, "learning_rate": 6.668229476397475e-07, "loss": 0.8038276433944702, "step": 15148 }, { "epoch": 18.587730061349692, "grad_norm": 0.2544812262058258, "learning_rate": 6.6567193299997e-07, "loss": 0.6797153353691101, "step": 15149 }, { "epoch": 18.588957055214724, "grad_norm": 0.2733532786369324, "learning_rate": 6.645218992075569e-07, "loss": 0.6351924538612366, "step": 15150 }, { "epoch": 18.590184049079756, "grad_norm": 0.27875566482543945, "learning_rate": 6.633728463088545e-07, "loss": 0.6879254579544067, "step": 15151 }, { "epoch": 18.591411042944785, "grad_norm": 0.25787198543548584, "learning_rate": 6.622247743501814e-07, "loss": 0.5680431723594666, "step": 15152 }, { "epoch": 18.592638036809817, "grad_norm": 0.2976534962654114, "learning_rate": 6.610776833778171e-07, "loss": 0.6325063705444336, "step": 15153 }, { "epoch": 18.593865030674845, "grad_norm": 0.30335891246795654, "learning_rate": 6.599315734379913e-07, "loss": 0.6572613716125488, "step": 15154 }, { "epoch": 18.595092024539877, "grad_norm": 0.260000616312027, "learning_rate": 6.587864445769087e-07, "loss": 0.4684571921825409, "step": 15155 }, { "epoch": 18.59631901840491, "grad_norm": 0.2805623710155487, "learning_rate": 6.576422968407186e-07, "loss": 0.6678920984268188, "step": 15156 }, { "epoch": 18.597546012269937, "grad_norm": 0.3032101094722748, "learning_rate": 6.564991302755369e-07, "loss": 0.7595306634902954, "step": 15157 }, { "epoch": 18.59877300613497, "grad_norm": 0.2664569914340973, "learning_rate": 6.553569449274488e-07, "loss": 0.5015713572502136, "step": 15158 }, { "epoch": 18.6, "grad_norm": 0.2755366265773773, "learning_rate": 6.542157408424926e-07, "loss": 0.6590249538421631, "step": 15159 }, { "epoch": 18.60122699386503, "grad_norm": 0.265776127576828, "learning_rate": 6.530755180666592e-07, "loss": 0.7124239206314087, "step": 15160 }, { "epoch": 18.602453987730062, "grad_norm": 0.24378176033496857, "learning_rate": 6.519362766459119e-07, "loss": 0.5130454897880554, "step": 15161 }, { "epoch": 18.60368098159509, "grad_norm": 0.25190094113349915, "learning_rate": 6.507980166261724e-07, "loss": 0.6136342287063599, "step": 15162 }, { "epoch": 18.604907975460122, "grad_norm": 0.2689828872680664, "learning_rate": 6.496607380533176e-07, "loss": 0.5782923698425293, "step": 15163 }, { "epoch": 18.606134969325154, "grad_norm": 0.29148581624031067, "learning_rate": 6.485244409731917e-07, "loss": 0.8735064268112183, "step": 15164 }, { "epoch": 18.607361963190183, "grad_norm": 0.2673431932926178, "learning_rate": 6.473891254315911e-07, "loss": 0.6103352308273315, "step": 15165 }, { "epoch": 18.608588957055215, "grad_norm": 0.24434660375118256, "learning_rate": 6.462547914742794e-07, "loss": 0.5326496362686157, "step": 15166 }, { "epoch": 18.609815950920247, "grad_norm": 0.26905757188796997, "learning_rate": 6.451214391469756e-07, "loss": 0.6075183153152466, "step": 15167 }, { "epoch": 18.611042944785275, "grad_norm": 0.27528470754623413, "learning_rate": 6.439890684953681e-07, "loss": 0.6315281987190247, "step": 15168 }, { "epoch": 18.612269938650307, "grad_norm": 0.26316067576408386, "learning_rate": 6.428576795650953e-07, "loss": 0.5694060921669006, "step": 15169 }, { "epoch": 18.61349693251534, "grad_norm": 0.27509331703186035, "learning_rate": 6.417272724017598e-07, "loss": 0.5225584506988525, "step": 15170 }, { "epoch": 18.614723926380368, "grad_norm": 0.2137763351202011, "learning_rate": 6.40597847050925e-07, "loss": 0.5263010859489441, "step": 15171 }, { "epoch": 18.6159509202454, "grad_norm": 0.28555798530578613, "learning_rate": 6.394694035581156e-07, "loss": 0.5906305909156799, "step": 15172 }, { "epoch": 18.617177914110428, "grad_norm": 0.27267321944236755, "learning_rate": 6.383419419688147e-07, "loss": 0.7411438226699829, "step": 15173 }, { "epoch": 18.61840490797546, "grad_norm": 0.28868696093559265, "learning_rate": 6.372154623284748e-07, "loss": 0.4843969941139221, "step": 15174 }, { "epoch": 18.619631901840492, "grad_norm": 0.25439172983169556, "learning_rate": 6.360899646824903e-07, "loss": 0.5076420307159424, "step": 15175 }, { "epoch": 18.62085889570552, "grad_norm": 0.25827136635780334, "learning_rate": 6.349654490762302e-07, "loss": 0.5669487118721008, "step": 15176 }, { "epoch": 18.622085889570553, "grad_norm": 0.2632838785648346, "learning_rate": 6.338419155550223e-07, "loss": 0.5522025227546692, "step": 15177 }, { "epoch": 18.62331288343558, "grad_norm": 0.33273184299468994, "learning_rate": 6.327193641641527e-07, "loss": 0.8938724994659424, "step": 15178 }, { "epoch": 18.624539877300613, "grad_norm": 0.2522442042827606, "learning_rate": 6.315977949488683e-07, "loss": 0.5643070936203003, "step": 15179 }, { "epoch": 18.625766871165645, "grad_norm": 0.296111136674881, "learning_rate": 6.304772079543747e-07, "loss": 0.6548358201980591, "step": 15180 }, { "epoch": 18.626993865030673, "grad_norm": 0.2684704065322876, "learning_rate": 6.293576032258413e-07, "loss": 0.592420756816864, "step": 15181 }, { "epoch": 18.628220858895705, "grad_norm": 0.3072897493839264, "learning_rate": 6.282389808083961e-07, "loss": 0.743360161781311, "step": 15182 }, { "epoch": 18.629447852760737, "grad_norm": 0.26359885931015015, "learning_rate": 6.271213407471305e-07, "loss": 0.7179452180862427, "step": 15183 }, { "epoch": 18.630674846625766, "grad_norm": 0.25691908597946167, "learning_rate": 6.260046830870864e-07, "loss": 0.6879527568817139, "step": 15184 }, { "epoch": 18.631901840490798, "grad_norm": 0.2890221178531647, "learning_rate": 6.24889007873275e-07, "loss": 0.6400716304779053, "step": 15185 }, { "epoch": 18.63312883435583, "grad_norm": 0.2790442109107971, "learning_rate": 6.237743151506715e-07, "loss": 0.4744044542312622, "step": 15186 }, { "epoch": 18.63435582822086, "grad_norm": 0.27908948063850403, "learning_rate": 6.22660604964198e-07, "loss": 0.6723270416259766, "step": 15187 }, { "epoch": 18.63558282208589, "grad_norm": 0.2986932396888733, "learning_rate": 6.215478773587552e-07, "loss": 0.771935224533081, "step": 15188 }, { "epoch": 18.63680981595092, "grad_norm": 0.27121642231941223, "learning_rate": 6.204361323791846e-07, "loss": 0.528566837310791, "step": 15189 }, { "epoch": 18.63803680981595, "grad_norm": 0.25893843173980713, "learning_rate": 6.193253700703005e-07, "loss": 0.6105513572692871, "step": 15190 }, { "epoch": 18.639263803680983, "grad_norm": 0.23307769000530243, "learning_rate": 6.182155904768727e-07, "loss": 0.47514796257019043, "step": 15191 }, { "epoch": 18.64049079754601, "grad_norm": 0.2811766564846039, "learning_rate": 6.171067936436375e-07, "loss": 0.48511427640914917, "step": 15192 }, { "epoch": 18.641717791411043, "grad_norm": 0.22637492418289185, "learning_rate": 6.159989796152898e-07, "loss": 0.43498456478118896, "step": 15193 }, { "epoch": 18.642944785276075, "grad_norm": 0.2596653401851654, "learning_rate": 6.148921484364717e-07, "loss": 0.5635366439819336, "step": 15194 }, { "epoch": 18.644171779141104, "grad_norm": 0.30054521560668945, "learning_rate": 6.137863001518057e-07, "loss": 0.5813276767730713, "step": 15195 }, { "epoch": 18.645398773006136, "grad_norm": 0.2696506083011627, "learning_rate": 6.126814348058591e-07, "loss": 0.6740450859069824, "step": 15196 }, { "epoch": 18.646625766871164, "grad_norm": 0.24237844347953796, "learning_rate": 6.11577552443171e-07, "loss": 0.4849277138710022, "step": 15197 }, { "epoch": 18.647852760736196, "grad_norm": 0.27191784977912903, "learning_rate": 6.104746531082367e-07, "loss": 0.6088966131210327, "step": 15198 }, { "epoch": 18.649079754601228, "grad_norm": 0.24983380734920502, "learning_rate": 6.093727368455038e-07, "loss": 0.46161937713623047, "step": 15199 }, { "epoch": 18.650306748466257, "grad_norm": 0.28195759654045105, "learning_rate": 6.082718036993923e-07, "loss": 0.6653953790664673, "step": 15200 }, { "epoch": 18.65153374233129, "grad_norm": 0.2479625642299652, "learning_rate": 6.071718537142751e-07, "loss": 0.5292989611625671, "step": 15201 }, { "epoch": 18.65276073619632, "grad_norm": 0.24449020624160767, "learning_rate": 6.060728869344945e-07, "loss": 0.41676780581474304, "step": 15202 }, { "epoch": 18.65398773006135, "grad_norm": 0.2653891444206238, "learning_rate": 6.0497490340434e-07, "loss": 0.5717869997024536, "step": 15203 }, { "epoch": 18.65521472392638, "grad_norm": 0.2833523452281952, "learning_rate": 6.038779031680708e-07, "loss": 0.6582491397857666, "step": 15204 }, { "epoch": 18.65644171779141, "grad_norm": 0.24876902997493744, "learning_rate": 6.027818862699013e-07, "loss": 0.5271468162536621, "step": 15205 }, { "epoch": 18.65766871165644, "grad_norm": 0.2767656147480011, "learning_rate": 6.016868527540131e-07, "loss": 0.6003050804138184, "step": 15206 }, { "epoch": 18.658895705521473, "grad_norm": 0.29805460572242737, "learning_rate": 6.005928026645429e-07, "loss": 0.7094470262527466, "step": 15207 }, { "epoch": 18.660122699386502, "grad_norm": 0.3125782310962677, "learning_rate": 5.994997360455862e-07, "loss": 0.6571345329284668, "step": 15208 }, { "epoch": 18.661349693251534, "grad_norm": 0.26030030846595764, "learning_rate": 5.984076529412019e-07, "loss": 0.6451325416564941, "step": 15209 }, { "epoch": 18.662576687116566, "grad_norm": 0.27238890528678894, "learning_rate": 5.973165533954106e-07, "loss": 0.4976768493652344, "step": 15210 }, { "epoch": 18.663803680981594, "grad_norm": 0.25750961899757385, "learning_rate": 5.962264374521908e-07, "loss": 0.5418727397918701, "step": 15211 }, { "epoch": 18.665030674846626, "grad_norm": 0.24363575875759125, "learning_rate": 5.951373051554826e-07, "loss": 0.4258368909358978, "step": 15212 }, { "epoch": 18.666257668711655, "grad_norm": 0.25335267186164856, "learning_rate": 5.940491565491813e-07, "loss": 0.6272305846214294, "step": 15213 }, { "epoch": 18.667484662576687, "grad_norm": 0.2740723192691803, "learning_rate": 5.929619916771518e-07, "loss": 0.5700002908706665, "step": 15214 }, { "epoch": 18.66871165644172, "grad_norm": 0.27243390679359436, "learning_rate": 5.918758105832145e-07, "loss": 0.853178083896637, "step": 15215 }, { "epoch": 18.669938650306747, "grad_norm": 0.28763407468795776, "learning_rate": 5.907906133111485e-07, "loss": 0.6041160225868225, "step": 15216 }, { "epoch": 18.67116564417178, "grad_norm": 0.2628782093524933, "learning_rate": 5.897063999046965e-07, "loss": 0.5168743133544922, "step": 15217 }, { "epoch": 18.67239263803681, "grad_norm": 0.3157549202442169, "learning_rate": 5.886231704075596e-07, "loss": 0.7035725116729736, "step": 15218 }, { "epoch": 18.67361963190184, "grad_norm": 0.3202553689479828, "learning_rate": 5.875409248633973e-07, "loss": 0.536139965057373, "step": 15219 }, { "epoch": 18.67484662576687, "grad_norm": 0.24115949869155884, "learning_rate": 5.864596633158331e-07, "loss": 0.4880269765853882, "step": 15220 }, { "epoch": 18.6760736196319, "grad_norm": 0.26897159218788147, "learning_rate": 5.853793858084517e-07, "loss": 0.7130246162414551, "step": 15221 }, { "epoch": 18.677300613496932, "grad_norm": 0.26036468148231506, "learning_rate": 5.84300092384793e-07, "loss": 0.5350053906440735, "step": 15222 }, { "epoch": 18.678527607361964, "grad_norm": 0.26878878474235535, "learning_rate": 5.832217830883641e-07, "loss": 0.6075853705406189, "step": 15223 }, { "epoch": 18.679754601226993, "grad_norm": 0.2722039222717285, "learning_rate": 5.821444579626245e-07, "loss": 0.6574867963790894, "step": 15224 }, { "epoch": 18.680981595092025, "grad_norm": 0.2535555362701416, "learning_rate": 5.810681170510007e-07, "loss": 0.5557583570480347, "step": 15225 }, { "epoch": 18.682208588957057, "grad_norm": 0.26748672127723694, "learning_rate": 5.799927603968747e-07, "loss": 0.6356940269470215, "step": 15226 }, { "epoch": 18.683435582822085, "grad_norm": 0.26495257019996643, "learning_rate": 5.789183880435978e-07, "loss": 0.525108814239502, "step": 15227 }, { "epoch": 18.684662576687117, "grad_norm": 0.272734671831131, "learning_rate": 5.77845000034466e-07, "loss": 0.6729491949081421, "step": 15228 }, { "epoch": 18.68588957055215, "grad_norm": 0.25642308592796326, "learning_rate": 5.767725964127473e-07, "loss": 0.565299391746521, "step": 15229 }, { "epoch": 18.687116564417177, "grad_norm": 0.25593680143356323, "learning_rate": 5.757011772216686e-07, "loss": 0.48387008905410767, "step": 15230 }, { "epoch": 18.68834355828221, "grad_norm": 0.28749197721481323, "learning_rate": 5.746307425044145e-07, "loss": 0.6839739680290222, "step": 15231 }, { "epoch": 18.689570552147238, "grad_norm": 0.2638063132762909, "learning_rate": 5.735612923041339e-07, "loss": 0.4446793496608734, "step": 15232 }, { "epoch": 18.69079754601227, "grad_norm": 0.2121974229812622, "learning_rate": 5.724928266639313e-07, "loss": 0.4256136417388916, "step": 15233 }, { "epoch": 18.692024539877302, "grad_norm": 0.28113579750061035, "learning_rate": 5.714253456268693e-07, "loss": 0.5871365070343018, "step": 15234 }, { "epoch": 18.69325153374233, "grad_norm": 0.27249473333358765, "learning_rate": 5.703588492359829e-07, "loss": 0.47084981203079224, "step": 15235 }, { "epoch": 18.694478527607362, "grad_norm": 0.2878004312515259, "learning_rate": 5.692933375342547e-07, "loss": 0.9171600937843323, "step": 15236 }, { "epoch": 18.69570552147239, "grad_norm": 0.2571480870246887, "learning_rate": 5.682288105646361e-07, "loss": 0.6159586906433105, "step": 15237 }, { "epoch": 18.696932515337423, "grad_norm": 0.26886430382728577, "learning_rate": 5.67165268370029e-07, "loss": 0.6129826903343201, "step": 15238 }, { "epoch": 18.698159509202455, "grad_norm": 0.2444818615913391, "learning_rate": 5.661027109933048e-07, "loss": 0.6010912656784058, "step": 15239 }, { "epoch": 18.699386503067483, "grad_norm": 0.27998071908950806, "learning_rate": 5.650411384772958e-07, "loss": 0.5719574689865112, "step": 15240 }, { "epoch": 18.700613496932515, "grad_norm": 0.27225813269615173, "learning_rate": 5.639805508647844e-07, "loss": 0.7159888744354248, "step": 15241 }, { "epoch": 18.701840490797547, "grad_norm": 0.24633541703224182, "learning_rate": 5.629209481985281e-07, "loss": 0.6096060872077942, "step": 15242 }, { "epoch": 18.703067484662576, "grad_norm": 0.26989373564720154, "learning_rate": 5.618623305212289e-07, "loss": 0.6154076457023621, "step": 15243 }, { "epoch": 18.704294478527608, "grad_norm": 0.25045183300971985, "learning_rate": 5.608046978755582e-07, "loss": 0.549468457698822, "step": 15244 }, { "epoch": 18.70552147239264, "grad_norm": 0.22654187679290771, "learning_rate": 5.597480503041486e-07, "loss": 0.37039828300476074, "step": 15245 }, { "epoch": 18.706748466257668, "grad_norm": 0.2751024067401886, "learning_rate": 5.586923878495881e-07, "loss": 0.38881999254226685, "step": 15246 }, { "epoch": 18.7079754601227, "grad_norm": 0.26486557722091675, "learning_rate": 5.57637710554429e-07, "loss": 0.5700777769088745, "step": 15247 }, { "epoch": 18.70920245398773, "grad_norm": 0.2814318537712097, "learning_rate": 5.565840184611814e-07, "loss": 0.5660899877548218, "step": 15248 }, { "epoch": 18.71042944785276, "grad_norm": 0.22533221542835236, "learning_rate": 5.555313116123174e-07, "loss": 0.4923405945301056, "step": 15249 }, { "epoch": 18.711656441717793, "grad_norm": 0.27861565351486206, "learning_rate": 5.544795900502692e-07, "loss": 0.5831438302993774, "step": 15250 }, { "epoch": 18.71288343558282, "grad_norm": 0.27669093012809753, "learning_rate": 5.53428853817431e-07, "loss": 0.6797917485237122, "step": 15251 }, { "epoch": 18.714110429447853, "grad_norm": 0.27215513586997986, "learning_rate": 5.523791029561492e-07, "loss": 0.6180120706558228, "step": 15252 }, { "epoch": 18.715337423312885, "grad_norm": 0.25468236207962036, "learning_rate": 5.513303375087376e-07, "loss": 0.5339978933334351, "step": 15253 }, { "epoch": 18.716564417177914, "grad_norm": 0.2686900198459625, "learning_rate": 5.502825575174703e-07, "loss": 0.5468940138816833, "step": 15254 }, { "epoch": 18.717791411042946, "grad_norm": 0.2745606005191803, "learning_rate": 5.492357630245831e-07, "loss": 0.46569889783859253, "step": 15255 }, { "epoch": 18.719018404907974, "grad_norm": 0.27219411730766296, "learning_rate": 5.481899540722673e-07, "loss": 0.537952184677124, "step": 15256 }, { "epoch": 18.720245398773006, "grad_norm": 0.2922494113445282, "learning_rate": 5.471451307026726e-07, "loss": 0.3274156451225281, "step": 15257 }, { "epoch": 18.721472392638038, "grad_norm": 0.24552351236343384, "learning_rate": 5.461012929579151e-07, "loss": 0.3906986713409424, "step": 15258 }, { "epoch": 18.722699386503066, "grad_norm": 0.2826331555843353, "learning_rate": 5.450584408800724e-07, "loss": 0.6662216186523438, "step": 15259 }, { "epoch": 18.7239263803681, "grad_norm": 0.2794772982597351, "learning_rate": 5.440165745111747e-07, "loss": 0.37987232208251953, "step": 15260 }, { "epoch": 18.72515337423313, "grad_norm": 0.2881767153739929, "learning_rate": 5.42975693893219e-07, "loss": 0.7102439403533936, "step": 15261 }, { "epoch": 18.72638036809816, "grad_norm": 0.2919233739376068, "learning_rate": 5.419357990681606e-07, "loss": 0.6430931687355042, "step": 15262 }, { "epoch": 18.72760736196319, "grad_norm": 0.30307117104530334, "learning_rate": 5.408968900779104e-07, "loss": 0.6392380595207214, "step": 15263 }, { "epoch": 18.72883435582822, "grad_norm": 0.3010847866535187, "learning_rate": 5.398589669643489e-07, "loss": 0.5133817791938782, "step": 15264 }, { "epoch": 18.73006134969325, "grad_norm": 0.25219497084617615, "learning_rate": 5.388220297693092e-07, "loss": 0.6730844378471375, "step": 15265 }, { "epoch": 18.731288343558283, "grad_norm": 0.48402801156044006, "learning_rate": 5.377860785345911e-07, "loss": 0.6036498546600342, "step": 15266 }, { "epoch": 18.73251533742331, "grad_norm": 0.30074167251586914, "learning_rate": 5.367511133019448e-07, "loss": 0.7011460065841675, "step": 15267 }, { "epoch": 18.733742331288344, "grad_norm": 0.2780938446521759, "learning_rate": 5.357171341130895e-07, "loss": 0.645973801612854, "step": 15268 }, { "epoch": 18.734969325153376, "grad_norm": 0.2802067697048187, "learning_rate": 5.346841410097031e-07, "loss": 0.6336469650268555, "step": 15269 }, { "epoch": 18.736196319018404, "grad_norm": 0.26530593633651733, "learning_rate": 5.336521340334217e-07, "loss": 0.5966158509254456, "step": 15270 }, { "epoch": 18.737423312883436, "grad_norm": 0.2976471483707428, "learning_rate": 5.326211132258424e-07, "loss": 0.5017695426940918, "step": 15271 }, { "epoch": 18.738650306748465, "grad_norm": 0.25110357999801636, "learning_rate": 5.315910786285239e-07, "loss": 0.4725927710533142, "step": 15272 }, { "epoch": 18.739877300613497, "grad_norm": 0.28125351667404175, "learning_rate": 5.3056203028298e-07, "loss": 0.6430981159210205, "step": 15273 }, { "epoch": 18.74110429447853, "grad_norm": 0.2614331841468811, "learning_rate": 5.295339682306943e-07, "loss": 0.4597034454345703, "step": 15274 }, { "epoch": 18.742331288343557, "grad_norm": 0.3079015016555786, "learning_rate": 5.285068925131031e-07, "loss": 0.6254681348800659, "step": 15275 }, { "epoch": 18.74355828220859, "grad_norm": 0.2647608518600464, "learning_rate": 5.274808031716039e-07, "loss": 0.5448773503303528, "step": 15276 }, { "epoch": 18.74478527607362, "grad_norm": 0.24666734039783478, "learning_rate": 5.264557002475523e-07, "loss": 0.5429290533065796, "step": 15277 }, { "epoch": 18.74601226993865, "grad_norm": 0.2802443206310272, "learning_rate": 5.254315837822738e-07, "loss": 0.5163336992263794, "step": 15278 }, { "epoch": 18.74723926380368, "grad_norm": 0.29850152134895325, "learning_rate": 5.244084538170435e-07, "loss": 0.5023835897445679, "step": 15279 }, { "epoch": 18.74846625766871, "grad_norm": 0.3194597363471985, "learning_rate": 5.233863103931036e-07, "loss": 0.5030274391174316, "step": 15280 }, { "epoch": 18.749693251533742, "grad_norm": 0.310451865196228, "learning_rate": 5.223651535516488e-07, "loss": 0.6851617097854614, "step": 15281 }, { "epoch": 18.750920245398774, "grad_norm": 0.28196021914482117, "learning_rate": 5.213449833338463e-07, "loss": 0.37932470440864563, "step": 15282 }, { "epoch": 18.752147239263802, "grad_norm": 0.26480937004089355, "learning_rate": 5.203257997808076e-07, "loss": 0.514933168888092, "step": 15283 }, { "epoch": 18.753374233128834, "grad_norm": 0.25607478618621826, "learning_rate": 5.193076029336191e-07, "loss": 0.49566972255706787, "step": 15284 }, { "epoch": 18.754601226993866, "grad_norm": 0.23202572762966156, "learning_rate": 5.182903928333233e-07, "loss": 0.43272995948791504, "step": 15285 }, { "epoch": 18.755828220858895, "grad_norm": 0.3067319691181183, "learning_rate": 5.172741695209149e-07, "loss": 0.6113895773887634, "step": 15286 }, { "epoch": 18.757055214723927, "grad_norm": 0.27538979053497314, "learning_rate": 5.162589330373585e-07, "loss": 0.46863114833831787, "step": 15287 }, { "epoch": 18.758282208588955, "grad_norm": 0.28605467081069946, "learning_rate": 5.152446834235741e-07, "loss": 0.4998927712440491, "step": 15288 }, { "epoch": 18.759509202453987, "grad_norm": 0.2718774974346161, "learning_rate": 5.14231420720443e-07, "loss": 0.6954993605613708, "step": 15289 }, { "epoch": 18.76073619631902, "grad_norm": 0.2970033884048462, "learning_rate": 5.1321914496881e-07, "loss": 0.5219913125038147, "step": 15290 }, { "epoch": 18.761963190184048, "grad_norm": 0.24057511985301971, "learning_rate": 5.122078562094734e-07, "loss": 0.5302784442901611, "step": 15291 }, { "epoch": 18.76319018404908, "grad_norm": 0.23735012114048004, "learning_rate": 5.111975544831948e-07, "loss": 0.5459224581718445, "step": 15292 }, { "epoch": 18.764417177914112, "grad_norm": 0.23699013888835907, "learning_rate": 5.101882398307029e-07, "loss": 0.41781097650527954, "step": 15293 }, { "epoch": 18.76564417177914, "grad_norm": 0.2731378674507141, "learning_rate": 5.091799122926733e-07, "loss": 0.47894155979156494, "step": 15294 }, { "epoch": 18.766871165644172, "grad_norm": 0.2660670578479767, "learning_rate": 5.081725719097541e-07, "loss": 0.5509322881698608, "step": 15295 }, { "epoch": 18.7680981595092, "grad_norm": 0.26366573572158813, "learning_rate": 5.071662187225407e-07, "loss": 0.49648618698120117, "step": 15296 }, { "epoch": 18.769325153374233, "grad_norm": 0.2419794499874115, "learning_rate": 5.06160852771606e-07, "loss": 0.4754983186721802, "step": 15297 }, { "epoch": 18.770552147239265, "grad_norm": 0.2884845435619354, "learning_rate": 5.051564740974651e-07, "loss": 0.5286359786987305, "step": 15298 }, { "epoch": 18.771779141104293, "grad_norm": 0.26142531633377075, "learning_rate": 5.041530827406076e-07, "loss": 0.6019649505615234, "step": 15299 }, { "epoch": 18.773006134969325, "grad_norm": 0.3117089867591858, "learning_rate": 5.031506787414764e-07, "loss": 0.7385158538818359, "step": 15300 }, { "epoch": 18.774233128834357, "grad_norm": 0.23346325755119324, "learning_rate": 5.021492621404694e-07, "loss": 0.5686599016189575, "step": 15301 }, { "epoch": 18.775460122699386, "grad_norm": 0.30095958709716797, "learning_rate": 5.011488329779602e-07, "loss": 0.7451412677764893, "step": 15302 }, { "epoch": 18.776687116564418, "grad_norm": 0.26715636253356934, "learning_rate": 5.001493912942662e-07, "loss": 0.7142741680145264, "step": 15303 }, { "epoch": 18.77791411042945, "grad_norm": 0.2585250735282898, "learning_rate": 4.991509371296748e-07, "loss": 0.6067298650741577, "step": 15304 }, { "epoch": 18.779141104294478, "grad_norm": 0.22537195682525635, "learning_rate": 4.981534705244317e-07, "loss": 0.34548866748809814, "step": 15305 }, { "epoch": 18.78036809815951, "grad_norm": 0.2498675286769867, "learning_rate": 4.971569915187379e-07, "loss": 0.5628390312194824, "step": 15306 }, { "epoch": 18.78159509202454, "grad_norm": 0.28589847683906555, "learning_rate": 4.961615001527642e-07, "loss": 0.6326263546943665, "step": 15307 }, { "epoch": 18.78282208588957, "grad_norm": 0.2657351791858673, "learning_rate": 4.951669964666312e-07, "loss": 0.4277014136314392, "step": 15308 }, { "epoch": 18.784049079754602, "grad_norm": 0.2827761471271515, "learning_rate": 4.941734805004289e-07, "loss": 0.8661507368087769, "step": 15309 }, { "epoch": 18.78527607361963, "grad_norm": 0.29832637310028076, "learning_rate": 4.931809522942005e-07, "loss": 0.698661208152771, "step": 15310 }, { "epoch": 18.786503067484663, "grad_norm": 0.25398361682891846, "learning_rate": 4.9218941188795e-07, "loss": 0.5759528279304504, "step": 15311 }, { "epoch": 18.787730061349695, "grad_norm": 0.24753601849079132, "learning_rate": 4.911988593216455e-07, "loss": 0.6113766431808472, "step": 15312 }, { "epoch": 18.788957055214723, "grad_norm": 0.2627313435077667, "learning_rate": 4.902092946352188e-07, "loss": 0.8472735285758972, "step": 15313 }, { "epoch": 18.790184049079755, "grad_norm": 0.24352765083312988, "learning_rate": 4.892207178685465e-07, "loss": 0.5096030235290527, "step": 15314 }, { "epoch": 18.791411042944784, "grad_norm": 0.25554129481315613, "learning_rate": 4.8823312906148e-07, "loss": 0.48963814973831177, "step": 15315 }, { "epoch": 18.792638036809816, "grad_norm": 0.2661280632019043, "learning_rate": 4.872465282538291e-07, "loss": 0.7096434235572815, "step": 15316 }, { "epoch": 18.793865030674848, "grad_norm": 0.2772790193557739, "learning_rate": 4.862609154853564e-07, "loss": 0.3714704215526581, "step": 15317 }, { "epoch": 18.795092024539876, "grad_norm": 0.2562744617462158, "learning_rate": 4.852762907957941e-07, "loss": 0.5489447116851807, "step": 15318 }, { "epoch": 18.79631901840491, "grad_norm": 0.2723841071128845, "learning_rate": 4.842926542248216e-07, "loss": 0.7756527066230774, "step": 15319 }, { "epoch": 18.79754601226994, "grad_norm": 0.2623818516731262, "learning_rate": 4.833100058120932e-07, "loss": 0.6893080472946167, "step": 15320 }, { "epoch": 18.79877300613497, "grad_norm": 0.29040658473968506, "learning_rate": 4.823283455972133e-07, "loss": 0.6836308240890503, "step": 15321 }, { "epoch": 18.8, "grad_norm": 0.2843216359615326, "learning_rate": 4.813476736197531e-07, "loss": 0.4764214754104614, "step": 15322 }, { "epoch": 18.80122699386503, "grad_norm": 0.2815638482570648, "learning_rate": 4.803679899192392e-07, "loss": 0.6921877861022949, "step": 15323 }, { "epoch": 18.80245398773006, "grad_norm": 0.27609118819236755, "learning_rate": 4.793892945351597e-07, "loss": 0.45421868562698364, "step": 15324 }, { "epoch": 18.803680981595093, "grad_norm": 0.2480902224779129, "learning_rate": 4.784115875069606e-07, "loss": 0.6213089227676392, "step": 15325 }, { "epoch": 18.80490797546012, "grad_norm": 0.3058662414550781, "learning_rate": 4.774348688740548e-07, "loss": 0.6734089851379395, "step": 15326 }, { "epoch": 18.806134969325154, "grad_norm": 0.2818622291088104, "learning_rate": 4.764591386758055e-07, "loss": 0.7876219749450684, "step": 15327 }, { "epoch": 18.807361963190186, "grad_norm": 0.2581932842731476, "learning_rate": 4.754843969515477e-07, "loss": 0.5179560780525208, "step": 15328 }, { "epoch": 18.808588957055214, "grad_norm": 0.25926294922828674, "learning_rate": 4.745106437405694e-07, "loss": 0.6030786037445068, "step": 15329 }, { "epoch": 18.809815950920246, "grad_norm": 0.3052835166454315, "learning_rate": 4.735378790821143e-07, "loss": 0.8870722055435181, "step": 15330 }, { "epoch": 18.811042944785274, "grad_norm": 0.2827976942062378, "learning_rate": 4.7256610301539827e-07, "loss": 0.5485289692878723, "step": 15331 }, { "epoch": 18.812269938650306, "grad_norm": 0.2819404602050781, "learning_rate": 4.715953155795871e-07, "loss": 0.7439137697219849, "step": 15332 }, { "epoch": 18.81349693251534, "grad_norm": 0.22877560555934906, "learning_rate": 4.7062551681381074e-07, "loss": 0.34540295600891113, "step": 15333 }, { "epoch": 18.814723926380367, "grad_norm": 0.2862103283405304, "learning_rate": 4.696567067571628e-07, "loss": 0.6839549541473389, "step": 15334 }, { "epoch": 18.8159509202454, "grad_norm": 0.25082194805145264, "learning_rate": 4.6868888544868704e-07, "loss": 0.6146738529205322, "step": 15335 }, { "epoch": 18.81717791411043, "grad_norm": 0.21773378551006317, "learning_rate": 4.677220529273968e-07, "loss": 0.43376481533050537, "step": 15336 }, { "epoch": 18.81840490797546, "grad_norm": 0.27115803956985474, "learning_rate": 4.6675620923226357e-07, "loss": 0.4232448637485504, "step": 15337 }, { "epoch": 18.81963190184049, "grad_norm": 0.2727295756340027, "learning_rate": 4.6579135440221744e-07, "loss": 0.6450777053833008, "step": 15338 }, { "epoch": 18.82085889570552, "grad_norm": 0.2559374272823334, "learning_rate": 4.6482748847614666e-07, "loss": 0.5385802388191223, "step": 15339 }, { "epoch": 18.822085889570552, "grad_norm": 0.28574639558792114, "learning_rate": 4.638646114929035e-07, "loss": 0.6534530520439148, "step": 15340 }, { "epoch": 18.823312883435584, "grad_norm": 0.26913225650787354, "learning_rate": 4.629027234912986e-07, "loss": 0.5204800367355347, "step": 15341 }, { "epoch": 18.824539877300612, "grad_norm": 0.24075192213058472, "learning_rate": 4.619418245101037e-07, "loss": 0.5587427020072937, "step": 15342 }, { "epoch": 18.825766871165644, "grad_norm": 0.26131388545036316, "learning_rate": 4.609819145880517e-07, "loss": 0.6891093850135803, "step": 15343 }, { "epoch": 18.826993865030676, "grad_norm": 0.2937684655189514, "learning_rate": 4.600229937638284e-07, "loss": 0.7604595422744751, "step": 15344 }, { "epoch": 18.828220858895705, "grad_norm": 0.2711790204048157, "learning_rate": 4.5906506207608614e-07, "loss": 0.6121951341629028, "step": 15345 }, { "epoch": 18.829447852760737, "grad_norm": 0.25094518065452576, "learning_rate": 4.5810811956344126e-07, "loss": 0.5073522329330444, "step": 15346 }, { "epoch": 18.830674846625765, "grad_norm": 0.2741771340370178, "learning_rate": 4.571521662644601e-07, "loss": 0.6544768214225769, "step": 15347 }, { "epoch": 18.831901840490797, "grad_norm": 0.24836848676204681, "learning_rate": 4.561972022176786e-07, "loss": 0.4796122908592224, "step": 15348 }, { "epoch": 18.83312883435583, "grad_norm": 0.28345876932144165, "learning_rate": 4.552432274615853e-07, "loss": 0.6765681505203247, "step": 15349 }, { "epoch": 18.834355828220858, "grad_norm": 0.2971680164337158, "learning_rate": 4.5429024203463566e-07, "loss": 0.3718155026435852, "step": 15350 }, { "epoch": 18.83558282208589, "grad_norm": 0.5132148265838623, "learning_rate": 4.533382459752378e-07, "loss": 0.5887055993080139, "step": 15351 }, { "epoch": 18.83680981595092, "grad_norm": 0.2500431537628174, "learning_rate": 4.523872393217665e-07, "loss": 0.656025230884552, "step": 15352 }, { "epoch": 18.83803680981595, "grad_norm": 0.2625158131122589, "learning_rate": 4.514372221125551e-07, "loss": 0.5767805576324463, "step": 15353 }, { "epoch": 18.839263803680982, "grad_norm": 0.2744707763195038, "learning_rate": 4.504881943858924e-07, "loss": 0.696954607963562, "step": 15354 }, { "epoch": 18.84049079754601, "grad_norm": 0.2938406467437744, "learning_rate": 4.4954015618003386e-07, "loss": 0.619251549243927, "step": 15355 }, { "epoch": 18.841717791411043, "grad_norm": 0.248630091547966, "learning_rate": 4.485931075331934e-07, "loss": 0.6851462125778198, "step": 15356 }, { "epoch": 18.842944785276075, "grad_norm": 0.27372094988822937, "learning_rate": 4.4764704848354046e-07, "loss": 0.6563066244125366, "step": 15357 }, { "epoch": 18.844171779141103, "grad_norm": 0.26262858510017395, "learning_rate": 4.467019790692084e-07, "loss": 0.693672239780426, "step": 15358 }, { "epoch": 18.845398773006135, "grad_norm": 0.26992329955101013, "learning_rate": 4.457578993282918e-07, "loss": 0.586786687374115, "step": 15359 }, { "epoch": 18.846625766871167, "grad_norm": 0.26409637928009033, "learning_rate": 4.4481480929884355e-07, "loss": 0.6994605660438538, "step": 15360 }, { "epoch": 18.847852760736195, "grad_norm": 0.29597005248069763, "learning_rate": 4.4387270901887766e-07, "loss": 0.6220334768295288, "step": 15361 }, { "epoch": 18.849079754601227, "grad_norm": 0.2887323498725891, "learning_rate": 4.429315985263666e-07, "loss": 0.5639554262161255, "step": 15362 }, { "epoch": 18.85030674846626, "grad_norm": 0.2765934467315674, "learning_rate": 4.419914778592438e-07, "loss": 0.46188828349113464, "step": 15363 }, { "epoch": 18.851533742331288, "grad_norm": 0.23929435014724731, "learning_rate": 4.4105234705540123e-07, "loss": 0.5312220454216003, "step": 15364 }, { "epoch": 18.85276073619632, "grad_norm": 0.28333696722984314, "learning_rate": 4.4011420615269473e-07, "loss": 0.425798237323761, "step": 15365 }, { "epoch": 18.85398773006135, "grad_norm": 0.24437132477760315, "learning_rate": 4.391770551889385e-07, "loss": 0.5200620889663696, "step": 15366 }, { "epoch": 18.85521472392638, "grad_norm": 0.2677897810935974, "learning_rate": 4.382408942019078e-07, "loss": 0.680181622505188, "step": 15367 }, { "epoch": 18.856441717791412, "grad_norm": 0.27694380283355713, "learning_rate": 4.3730572322933093e-07, "loss": 0.6106482148170471, "step": 15368 }, { "epoch": 18.85766871165644, "grad_norm": 0.24693812429904938, "learning_rate": 4.363715423089054e-07, "loss": 0.5018847584724426, "step": 15369 }, { "epoch": 18.858895705521473, "grad_norm": 0.2784872055053711, "learning_rate": 4.3543835147828725e-07, "loss": 0.5582388639450073, "step": 15370 }, { "epoch": 18.860122699386505, "grad_norm": 0.2811260223388672, "learning_rate": 4.345061507750853e-07, "loss": 0.5976800918579102, "step": 15371 }, { "epoch": 18.861349693251533, "grad_norm": 0.26563286781311035, "learning_rate": 4.3357494023688326e-07, "loss": 0.5696654319763184, "step": 15372 }, { "epoch": 18.862576687116565, "grad_norm": 0.2658173143863678, "learning_rate": 4.326447199012068e-07, "loss": 0.5610763430595398, "step": 15373 }, { "epoch": 18.863803680981594, "grad_norm": 0.24842970073223114, "learning_rate": 4.317154898055509e-07, "loss": 0.6347891092300415, "step": 15374 }, { "epoch": 18.865030674846626, "grad_norm": 0.24308282136917114, "learning_rate": 4.3078724998737443e-07, "loss": 0.5663868188858032, "step": 15375 }, { "epoch": 18.866257668711658, "grad_norm": 0.22023813426494598, "learning_rate": 4.2986000048409194e-07, "loss": 0.37273138761520386, "step": 15376 }, { "epoch": 18.867484662576686, "grad_norm": 0.28790926933288574, "learning_rate": 4.289337413330763e-07, "loss": 0.52947598695755, "step": 15377 }, { "epoch": 18.868711656441718, "grad_norm": 0.25959306955337524, "learning_rate": 4.280084725716615e-07, "loss": 0.5756335258483887, "step": 15378 }, { "epoch": 18.86993865030675, "grad_norm": 0.2888113558292389, "learning_rate": 4.2708419423714006e-07, "loss": 0.64225172996521, "step": 15379 }, { "epoch": 18.87116564417178, "grad_norm": 0.30166149139404297, "learning_rate": 4.2616090636677374e-07, "loss": 0.4458548426628113, "step": 15380 }, { "epoch": 18.87239263803681, "grad_norm": 0.2879417836666107, "learning_rate": 4.252386089977772e-07, "loss": 0.6099374890327454, "step": 15381 }, { "epoch": 18.87361963190184, "grad_norm": 0.2846284508705139, "learning_rate": 4.243173021673208e-07, "loss": 0.7146259546279907, "step": 15382 }, { "epoch": 18.87484662576687, "grad_norm": 0.2766912579536438, "learning_rate": 4.233969859125414e-07, "loss": 0.6318878531455994, "step": 15383 }, { "epoch": 18.876073619631903, "grad_norm": 0.2662307024002075, "learning_rate": 4.224776602705371e-07, "loss": 0.6410751938819885, "step": 15384 }, { "epoch": 18.87730061349693, "grad_norm": 0.28768855333328247, "learning_rate": 4.2155932527835897e-07, "loss": 0.6313656568527222, "step": 15385 }, { "epoch": 18.878527607361963, "grad_norm": 0.2634570896625519, "learning_rate": 4.206419809730244e-07, "loss": 0.5571842193603516, "step": 15386 }, { "epoch": 18.879754601226995, "grad_norm": 0.2558663487434387, "learning_rate": 4.1972562739150957e-07, "loss": 0.6955661177635193, "step": 15387 }, { "epoch": 18.880981595092024, "grad_norm": 0.2200579047203064, "learning_rate": 4.188102645707487e-07, "loss": 0.41217517852783203, "step": 15388 }, { "epoch": 18.882208588957056, "grad_norm": 0.2848440408706665, "learning_rate": 4.178958925476401e-07, "loss": 0.6014900207519531, "step": 15389 }, { "epoch": 18.883435582822084, "grad_norm": 0.22961875796318054, "learning_rate": 4.1698251135903754e-07, "loss": 0.24417629837989807, "step": 15390 }, { "epoch": 18.884662576687116, "grad_norm": 0.25900471210479736, "learning_rate": 4.1607012104175614e-07, "loss": 0.4764086902141571, "step": 15391 }, { "epoch": 18.88588957055215, "grad_norm": 0.2680763602256775, "learning_rate": 4.1515872163257197e-07, "loss": 0.7528952956199646, "step": 15392 }, { "epoch": 18.887116564417177, "grad_norm": 0.25390854477882385, "learning_rate": 4.1424831316822235e-07, "loss": 0.48829060792922974, "step": 15393 }, { "epoch": 18.88834355828221, "grad_norm": 0.2617965340614319, "learning_rate": 4.1333889568540284e-07, "loss": 0.566694974899292, "step": 15394 }, { "epoch": 18.88957055214724, "grad_norm": 0.24462890625, "learning_rate": 4.1243046922076755e-07, "loss": 0.49365583062171936, "step": 15395 }, { "epoch": 18.89079754601227, "grad_norm": 0.2941041886806488, "learning_rate": 4.1152303381093713e-07, "loss": 0.7954063415527344, "step": 15396 }, { "epoch": 18.8920245398773, "grad_norm": 0.26340603828430176, "learning_rate": 4.106165894924824e-07, "loss": 0.6104364395141602, "step": 15397 }, { "epoch": 18.89325153374233, "grad_norm": 0.3128635585308075, "learning_rate": 4.0971113630194345e-07, "loss": 0.6848822832107544, "step": 15398 }, { "epoch": 18.89447852760736, "grad_norm": 0.3111709654331207, "learning_rate": 4.0880667427581063e-07, "loss": 0.5258172750473022, "step": 15399 }, { "epoch": 18.895705521472394, "grad_norm": 0.2768288850784302, "learning_rate": 4.0790320345054923e-07, "loss": 0.5429795384407043, "step": 15400 }, { "epoch": 18.896932515337422, "grad_norm": 0.24435095489025116, "learning_rate": 4.070007238625689e-07, "loss": 0.4184759855270386, "step": 15401 }, { "epoch": 18.898159509202454, "grad_norm": 0.26221925020217896, "learning_rate": 4.0609923554824625e-07, "loss": 0.5773870944976807, "step": 15402 }, { "epoch": 18.899386503067486, "grad_norm": 0.2912936210632324, "learning_rate": 4.0519873854392155e-07, "loss": 0.8390700221061707, "step": 15403 }, { "epoch": 18.900613496932515, "grad_norm": 0.30612891912460327, "learning_rate": 4.04299232885888e-07, "loss": 0.74934983253479, "step": 15404 }, { "epoch": 18.901840490797547, "grad_norm": 0.276533842086792, "learning_rate": 4.034007186104055e-07, "loss": 0.7439364194869995, "step": 15405 }, { "epoch": 18.903067484662575, "grad_norm": 0.2950587570667267, "learning_rate": 4.025031957536868e-07, "loss": 0.664257287979126, "step": 15406 }, { "epoch": 18.904294478527607, "grad_norm": 0.22966299951076508, "learning_rate": 4.016066643519112e-07, "loss": 0.46799468994140625, "step": 15407 }, { "epoch": 18.90552147239264, "grad_norm": 0.2641526758670807, "learning_rate": 4.0071112444121374e-07, "loss": 0.718100905418396, "step": 15408 }, { "epoch": 18.906748466257667, "grad_norm": 0.24755476415157318, "learning_rate": 3.998165760576905e-07, "loss": 0.47016048431396484, "step": 15409 }, { "epoch": 18.9079754601227, "grad_norm": 0.24538715183734894, "learning_rate": 3.9892301923739884e-07, "loss": 0.39219364523887634, "step": 15410 }, { "epoch": 18.90920245398773, "grad_norm": 0.2751717269420624, "learning_rate": 3.980304540163571e-07, "loss": 0.5266256332397461, "step": 15411 }, { "epoch": 18.91042944785276, "grad_norm": 0.27194634079933167, "learning_rate": 3.9713888043053926e-07, "loss": 0.7351025342941284, "step": 15412 }, { "epoch": 18.911656441717792, "grad_norm": 0.2860781252384186, "learning_rate": 3.962482985158861e-07, "loss": 0.6614590287208557, "step": 15413 }, { "epoch": 18.91288343558282, "grad_norm": 0.2399091124534607, "learning_rate": 3.9535870830828827e-07, "loss": 0.4663470685482025, "step": 15414 }, { "epoch": 18.914110429447852, "grad_norm": 0.25810569524765015, "learning_rate": 3.9447010984361155e-07, "loss": 0.575016975402832, "step": 15415 }, { "epoch": 18.915337423312884, "grad_norm": 0.4573909640312195, "learning_rate": 3.935825031576634e-07, "loss": 0.5934346914291382, "step": 15416 }, { "epoch": 18.916564417177913, "grad_norm": 0.2702994644641876, "learning_rate": 3.926958882862264e-07, "loss": 0.5717799067497253, "step": 15417 }, { "epoch": 18.917791411042945, "grad_norm": 0.2768639922142029, "learning_rate": 3.918102652650329e-07, "loss": 0.5957989692687988, "step": 15418 }, { "epoch": 18.919018404907977, "grad_norm": 0.2596666216850281, "learning_rate": 3.909256341297851e-07, "loss": 0.5739258527755737, "step": 15419 }, { "epoch": 18.920245398773005, "grad_norm": 0.27424895763397217, "learning_rate": 3.9004199491614046e-07, "loss": 0.4615115225315094, "step": 15420 }, { "epoch": 18.921472392638037, "grad_norm": 0.4937061369419098, "learning_rate": 3.891593476597094e-07, "loss": 0.7324157357215881, "step": 15421 }, { "epoch": 18.92269938650307, "grad_norm": 0.25138941407203674, "learning_rate": 3.882776923960746e-07, "loss": 0.5318384170532227, "step": 15422 }, { "epoch": 18.923926380368098, "grad_norm": 0.27020174264907837, "learning_rate": 3.8739702916077147e-07, "loss": 0.7053074836730957, "step": 15423 }, { "epoch": 18.92515337423313, "grad_norm": 0.2275635153055191, "learning_rate": 3.8651735798929387e-07, "loss": 0.4795500636100769, "step": 15424 }, { "epoch": 18.926380368098158, "grad_norm": 0.2655569016933441, "learning_rate": 3.8563867891710234e-07, "loss": 0.508735716342926, "step": 15425 }, { "epoch": 18.92760736196319, "grad_norm": 0.269887238740921, "learning_rate": 3.847609919796158e-07, "loss": 0.6576066017150879, "step": 15426 }, { "epoch": 18.928834355828222, "grad_norm": 0.28394457697868347, "learning_rate": 3.838842972122059e-07, "loss": 0.6434745788574219, "step": 15427 }, { "epoch": 18.93006134969325, "grad_norm": 0.25400447845458984, "learning_rate": 3.8300859465021655e-07, "loss": 0.4900987148284912, "step": 15428 }, { "epoch": 18.931288343558283, "grad_norm": 0.2823043763637543, "learning_rate": 3.8213388432893625e-07, "loss": 0.6874721050262451, "step": 15429 }, { "epoch": 18.93251533742331, "grad_norm": 0.2400689274072647, "learning_rate": 3.8126016628363124e-07, "loss": 0.5242182016372681, "step": 15430 }, { "epoch": 18.933742331288343, "grad_norm": 0.18596012890338898, "learning_rate": 3.8038744054951227e-07, "loss": 0.21262076497077942, "step": 15431 }, { "epoch": 18.934969325153375, "grad_norm": 0.28190475702285767, "learning_rate": 3.7951570716175666e-07, "loss": 0.6376181840896606, "step": 15432 }, { "epoch": 18.936196319018403, "grad_norm": 0.24646449089050293, "learning_rate": 3.786449661555058e-07, "loss": 0.5473042726516724, "step": 15433 }, { "epoch": 18.937423312883435, "grad_norm": 0.27783259749412537, "learning_rate": 3.777752175658511e-07, "loss": 0.5992516279220581, "step": 15434 }, { "epoch": 18.938650306748468, "grad_norm": 0.2646479308605194, "learning_rate": 3.769064614278561e-07, "loss": 0.5966908931732178, "step": 15435 }, { "epoch": 18.939877300613496, "grad_norm": 0.2635233700275421, "learning_rate": 3.7603869777653176e-07, "loss": 0.4744076430797577, "step": 15436 }, { "epoch": 18.941104294478528, "grad_norm": 0.2693670988082886, "learning_rate": 3.751719266468584e-07, "loss": 0.6023712754249573, "step": 15437 }, { "epoch": 18.94233128834356, "grad_norm": 0.26635730266571045, "learning_rate": 3.7430614807377194e-07, "loss": 0.5202741622924805, "step": 15438 }, { "epoch": 18.94355828220859, "grad_norm": 0.24820347130298615, "learning_rate": 3.734413620921695e-07, "loss": 0.6180890798568726, "step": 15439 }, { "epoch": 18.94478527607362, "grad_norm": 0.28504833579063416, "learning_rate": 3.725775687369121e-07, "loss": 0.614676833152771, "step": 15440 }, { "epoch": 18.94601226993865, "grad_norm": 0.26070815324783325, "learning_rate": 3.717147680428107e-07, "loss": 0.590442419052124, "step": 15441 }, { "epoch": 18.94723926380368, "grad_norm": 0.25560688972473145, "learning_rate": 3.708529600446459e-07, "loss": 0.4581637978553772, "step": 15442 }, { "epoch": 18.948466257668713, "grad_norm": 0.2697610557079315, "learning_rate": 3.699921447771509e-07, "loss": 0.36843010783195496, "step": 15443 }, { "epoch": 18.94969325153374, "grad_norm": 0.2566494345664978, "learning_rate": 3.691323222750287e-07, "loss": 0.6538009643554688, "step": 15444 }, { "epoch": 18.950920245398773, "grad_norm": 0.2448330670595169, "learning_rate": 3.68273492572932e-07, "loss": 0.4754892587661743, "step": 15445 }, { "epoch": 18.952147239263805, "grad_norm": 0.2714025676250458, "learning_rate": 3.6741565570547755e-07, "loss": 0.6130117177963257, "step": 15446 }, { "epoch": 18.953374233128834, "grad_norm": 0.2826921045780182, "learning_rate": 3.6655881170724604e-07, "loss": 0.7097179293632507, "step": 15447 }, { "epoch": 18.954601226993866, "grad_norm": 0.2563024163246155, "learning_rate": 3.65702960612771e-07, "loss": 0.5145770907402039, "step": 15448 }, { "epoch": 18.955828220858894, "grad_norm": 0.2801174521446228, "learning_rate": 3.6484810245655254e-07, "loss": 0.598757803440094, "step": 15449 }, { "epoch": 18.957055214723926, "grad_norm": 0.26695194840431213, "learning_rate": 3.6399423727304095e-07, "loss": 0.5180026292800903, "step": 15450 }, { "epoch": 18.958282208588958, "grad_norm": 0.2266158014535904, "learning_rate": 3.631413650966614e-07, "loss": 0.4648008346557617, "step": 15451 }, { "epoch": 18.959509202453987, "grad_norm": 0.2749348282814026, "learning_rate": 3.622894859617837e-07, "loss": 0.6426236629486084, "step": 15452 }, { "epoch": 18.96073619631902, "grad_norm": 0.27203667163848877, "learning_rate": 3.6143859990274975e-07, "loss": 0.5754863023757935, "step": 15453 }, { "epoch": 18.96196319018405, "grad_norm": 0.2672336995601654, "learning_rate": 3.6058870695385714e-07, "loss": 0.5096818208694458, "step": 15454 }, { "epoch": 18.96319018404908, "grad_norm": 0.280908465385437, "learning_rate": 3.5973980714935627e-07, "loss": 0.626288652420044, "step": 15455 }, { "epoch": 18.96441717791411, "grad_norm": 0.29029160737991333, "learning_rate": 3.58891900523467e-07, "loss": 0.760712742805481, "step": 15456 }, { "epoch": 18.96564417177914, "grad_norm": 0.24727702140808105, "learning_rate": 3.580449871103703e-07, "loss": 0.444758802652359, "step": 15457 }, { "epoch": 18.96687116564417, "grad_norm": 0.31069016456604004, "learning_rate": 3.5719906694419435e-07, "loss": 0.6165425181388855, "step": 15458 }, { "epoch": 18.968098159509204, "grad_norm": 0.27184250950813293, "learning_rate": 3.563541400590453e-07, "loss": 0.7477340698242188, "step": 15459 }, { "epoch": 18.969325153374232, "grad_norm": 0.2867529094219208, "learning_rate": 3.555102064889737e-07, "loss": 0.594327986240387, "step": 15460 }, { "epoch": 18.970552147239264, "grad_norm": 0.28202736377716064, "learning_rate": 3.5466726626799675e-07, "loss": 0.5788388252258301, "step": 15461 }, { "epoch": 18.971779141104296, "grad_norm": 0.22165407240390778, "learning_rate": 3.5382531943009003e-07, "loss": 0.38775181770324707, "step": 15462 }, { "epoch": 18.973006134969324, "grad_norm": 0.2565503418445587, "learning_rate": 3.5298436600919304e-07, "loss": 0.5452792644500732, "step": 15463 }, { "epoch": 18.974233128834356, "grad_norm": 0.2809341847896576, "learning_rate": 3.5214440603920097e-07, "loss": 0.7361241579055786, "step": 15464 }, { "epoch": 18.975460122699385, "grad_norm": 0.29712262749671936, "learning_rate": 3.5130543955397e-07, "loss": 0.7266162633895874, "step": 15465 }, { "epoch": 18.976687116564417, "grad_norm": 0.2793950140476227, "learning_rate": 3.5046746658731476e-07, "loss": 0.7207373976707458, "step": 15466 }, { "epoch": 18.97791411042945, "grad_norm": 0.2632257640361786, "learning_rate": 3.496304871730166e-07, "loss": 0.6137797236442566, "step": 15467 }, { "epoch": 18.979141104294477, "grad_norm": 0.2478286474943161, "learning_rate": 3.487945013448041e-07, "loss": 0.4830426573753357, "step": 15468 }, { "epoch": 18.98036809815951, "grad_norm": 0.26361966133117676, "learning_rate": 3.4795950913638074e-07, "loss": 0.6802637577056885, "step": 15469 }, { "epoch": 18.98159509202454, "grad_norm": 0.25063711404800415, "learning_rate": 3.471255105814003e-07, "loss": 0.507027804851532, "step": 15470 }, { "epoch": 18.98282208588957, "grad_norm": 0.2972632646560669, "learning_rate": 3.462925057134747e-07, "loss": 0.6897913813591003, "step": 15471 }, { "epoch": 18.9840490797546, "grad_norm": 0.2530694603919983, "learning_rate": 3.4546049456618823e-07, "loss": 0.5957586169242859, "step": 15472 }, { "epoch": 18.98527607361963, "grad_norm": 0.2942049503326416, "learning_rate": 3.446294771730696e-07, "loss": 0.5989881157875061, "step": 15473 }, { "epoch": 18.986503067484662, "grad_norm": 0.26282650232315063, "learning_rate": 3.437994535676198e-07, "loss": 0.4580558240413666, "step": 15474 }, { "epoch": 18.987730061349694, "grad_norm": 0.31557324528694153, "learning_rate": 3.429704237832898e-07, "loss": 0.5486705303192139, "step": 15475 }, { "epoch": 18.988957055214723, "grad_norm": 0.3002515435218811, "learning_rate": 3.421423878534974e-07, "loss": 0.7435135841369629, "step": 15476 }, { "epoch": 18.990184049079755, "grad_norm": 0.26372063159942627, "learning_rate": 3.413153458116214e-07, "loss": 0.521518886089325, "step": 15477 }, { "epoch": 18.991411042944787, "grad_norm": 0.24327510595321655, "learning_rate": 3.404892976909935e-07, "loss": 0.6549983024597168, "step": 15478 }, { "epoch": 18.992638036809815, "grad_norm": 0.2537882328033447, "learning_rate": 3.39664243524912e-07, "loss": 0.6205604672431946, "step": 15479 }, { "epoch": 18.993865030674847, "grad_norm": 0.22979171574115753, "learning_rate": 3.3884018334663093e-07, "loss": 0.5027973055839539, "step": 15480 }, { "epoch": 18.99509202453988, "grad_norm": 0.25540807843208313, "learning_rate": 3.3801711718936536e-07, "loss": 0.5741037726402283, "step": 15481 }, { "epoch": 18.996319018404908, "grad_norm": 0.24836954474449158, "learning_rate": 3.3719504508629154e-07, "loss": 0.5238505601882935, "step": 15482 }, { "epoch": 18.99754601226994, "grad_norm": 0.2540181577205658, "learning_rate": 3.363739670705468e-07, "loss": 0.5237050652503967, "step": 15483 }, { "epoch": 18.998773006134968, "grad_norm": 0.23090288043022156, "learning_rate": 3.355538831752242e-07, "loss": 0.553159236907959, "step": 15484 }, { "epoch": 19.0, "grad_norm": 0.31852981448173523, "learning_rate": 3.347347934333778e-07, "loss": 0.6536753177642822, "step": 15485 }, { "epoch": 19.001226993865032, "grad_norm": 0.25920507311820984, "learning_rate": 3.339166978780256e-07, "loss": 0.6572109460830688, "step": 15486 }, { "epoch": 19.00245398773006, "grad_norm": 0.2949686646461487, "learning_rate": 3.3309959654214127e-07, "loss": 0.689014196395874, "step": 15487 }, { "epoch": 19.003680981595092, "grad_norm": 0.28092673420906067, "learning_rate": 3.322834894586596e-07, "loss": 0.6477980613708496, "step": 15488 }, { "epoch": 19.004907975460124, "grad_norm": 0.30857640504837036, "learning_rate": 3.3146837666047646e-07, "loss": 0.5772422552108765, "step": 15489 }, { "epoch": 19.006134969325153, "grad_norm": 0.2557373344898224, "learning_rate": 3.306542581804434e-07, "loss": 0.6570507884025574, "step": 15490 }, { "epoch": 19.007361963190185, "grad_norm": 0.2586783766746521, "learning_rate": 3.2984113405138126e-07, "loss": 0.5694788694381714, "step": 15491 }, { "epoch": 19.008588957055213, "grad_norm": 0.23067456483840942, "learning_rate": 3.290290043060612e-07, "loss": 0.5652076601982117, "step": 15492 }, { "epoch": 19.009815950920245, "grad_norm": 0.2511611580848694, "learning_rate": 3.2821786897721805e-07, "loss": 0.37193605303764343, "step": 15493 }, { "epoch": 19.011042944785277, "grad_norm": 0.26309314370155334, "learning_rate": 3.2740772809754517e-07, "loss": 0.5918000340461731, "step": 15494 }, { "epoch": 19.012269938650306, "grad_norm": 0.2930505573749542, "learning_rate": 3.265985816996997e-07, "loss": 0.7556619644165039, "step": 15495 }, { "epoch": 19.013496932515338, "grad_norm": 0.28340160846710205, "learning_rate": 3.2579042981629447e-07, "loss": 0.6015145182609558, "step": 15496 }, { "epoch": 19.01472392638037, "grad_norm": 0.28173011541366577, "learning_rate": 3.2498327247990346e-07, "loss": 0.6100156307220459, "step": 15497 }, { "epoch": 19.0159509202454, "grad_norm": 0.2715858519077301, "learning_rate": 3.241771097230617e-07, "loss": 0.6466595530509949, "step": 15498 }, { "epoch": 19.01717791411043, "grad_norm": 0.2637956440448761, "learning_rate": 3.233719415782627e-07, "loss": 0.6553301811218262, "step": 15499 }, { "epoch": 19.01840490797546, "grad_norm": 0.2816595733165741, "learning_rate": 3.22567768077961e-07, "loss": 0.5865865349769592, "step": 15500 }, { "epoch": 19.01963190184049, "grad_norm": 0.2643696665763855, "learning_rate": 3.217645892545695e-07, "loss": 0.579836905002594, "step": 15501 }, { "epoch": 19.020858895705523, "grad_norm": 0.24225236475467682, "learning_rate": 3.2096240514046525e-07, "loss": 0.4839652180671692, "step": 15502 }, { "epoch": 19.02208588957055, "grad_norm": 0.26465895771980286, "learning_rate": 3.2016121576797787e-07, "loss": 0.4135524034500122, "step": 15503 }, { "epoch": 19.023312883435583, "grad_norm": 0.2830473482608795, "learning_rate": 3.193610211694037e-07, "loss": 0.6942921876907349, "step": 15504 }, { "epoch": 19.024539877300615, "grad_norm": 0.3100355267524719, "learning_rate": 3.1856182137699473e-07, "loss": 0.4198477864265442, "step": 15505 }, { "epoch": 19.025766871165644, "grad_norm": 0.26637738943099976, "learning_rate": 3.1776361642296415e-07, "loss": 0.8694722652435303, "step": 15506 }, { "epoch": 19.026993865030676, "grad_norm": 0.25420281291007996, "learning_rate": 3.1696640633948895e-07, "loss": 0.5303689241409302, "step": 15507 }, { "epoch": 19.028220858895704, "grad_norm": 0.28541603684425354, "learning_rate": 3.161701911586962e-07, "loss": 0.682641863822937, "step": 15508 }, { "epoch": 19.029447852760736, "grad_norm": 0.2848469913005829, "learning_rate": 3.153749709126852e-07, "loss": 0.5118822455406189, "step": 15509 }, { "epoch": 19.030674846625768, "grad_norm": 0.24559995532035828, "learning_rate": 3.1458074563350537e-07, "loss": 0.48838451504707336, "step": 15510 }, { "epoch": 19.031901840490796, "grad_norm": 0.2584964632987976, "learning_rate": 3.1378751535316996e-07, "loss": 0.5585110187530518, "step": 15511 }, { "epoch": 19.03312883435583, "grad_norm": 0.26058393716812134, "learning_rate": 3.129952801036534e-07, "loss": 0.5690903663635254, "step": 15512 }, { "epoch": 19.03435582822086, "grad_norm": 0.25363609194755554, "learning_rate": 3.1220403991688573e-07, "loss": 0.40863513946533203, "step": 15513 }, { "epoch": 19.03558282208589, "grad_norm": 0.26774558424949646, "learning_rate": 3.114137948247636e-07, "loss": 0.5699991583824158, "step": 15514 }, { "epoch": 19.03680981595092, "grad_norm": 0.2533966898918152, "learning_rate": 3.106245448591366e-07, "loss": 0.5713862180709839, "step": 15515 }, { "epoch": 19.03803680981595, "grad_norm": 0.277849406003952, "learning_rate": 3.098362900518209e-07, "loss": 0.5396018028259277, "step": 15516 }, { "epoch": 19.03926380368098, "grad_norm": 0.2900213599205017, "learning_rate": 3.0904903043458275e-07, "loss": 0.8321250677108765, "step": 15517 }, { "epoch": 19.040490797546013, "grad_norm": 0.2735212445259094, "learning_rate": 3.0826276603915517e-07, "loss": 0.607227087020874, "step": 15518 }, { "epoch": 19.041717791411042, "grad_norm": 0.2797504961490631, "learning_rate": 3.074774968972349e-07, "loss": 0.6368119716644287, "step": 15519 }, { "epoch": 19.042944785276074, "grad_norm": 0.26525843143463135, "learning_rate": 3.0669322304047176e-07, "loss": 0.6011868715286255, "step": 15520 }, { "epoch": 19.044171779141106, "grad_norm": 0.3154607117176056, "learning_rate": 3.059099445004793e-07, "loss": 0.6508238315582275, "step": 15521 }, { "epoch": 19.045398773006134, "grad_norm": 0.2825962007045746, "learning_rate": 3.051276613088239e-07, "loss": 0.6377282738685608, "step": 15522 }, { "epoch": 19.046625766871166, "grad_norm": 0.25757256150245667, "learning_rate": 3.0434637349704144e-07, "loss": 0.5409518480300903, "step": 15523 }, { "epoch": 19.047852760736195, "grad_norm": 0.2591853141784668, "learning_rate": 3.035660810966234e-07, "loss": 0.6383548974990845, "step": 15524 }, { "epoch": 19.049079754601227, "grad_norm": 0.27247244119644165, "learning_rate": 3.027867841390197e-07, "loss": 0.7271251678466797, "step": 15525 }, { "epoch": 19.05030674846626, "grad_norm": 0.23963193595409393, "learning_rate": 3.020084826556413e-07, "loss": 0.3522301912307739, "step": 15526 }, { "epoch": 19.051533742331287, "grad_norm": 0.24897794425487518, "learning_rate": 3.012311766778603e-07, "loss": 0.5510503053665161, "step": 15527 }, { "epoch": 19.05276073619632, "grad_norm": 0.2757461369037628, "learning_rate": 3.004548662370071e-07, "loss": 0.4541953206062317, "step": 15528 }, { "epoch": 19.05398773006135, "grad_norm": 0.28025543689727783, "learning_rate": 2.996795513643735e-07, "loss": 0.5072070360183716, "step": 15529 }, { "epoch": 19.05521472392638, "grad_norm": 0.25767865777015686, "learning_rate": 2.9890523209120944e-07, "loss": 0.5460700988769531, "step": 15530 }, { "epoch": 19.05644171779141, "grad_norm": 0.3000355064868927, "learning_rate": 2.9813190844872605e-07, "loss": 0.6726352572441101, "step": 15531 }, { "epoch": 19.05766871165644, "grad_norm": 0.2516064941883087, "learning_rate": 2.9735958046809563e-07, "loss": 0.3925473093986511, "step": 15532 }, { "epoch": 19.058895705521472, "grad_norm": 0.25730621814727783, "learning_rate": 2.965882481804433e-07, "loss": 0.3751104474067688, "step": 15533 }, { "epoch": 19.060122699386504, "grad_norm": 0.26689547300338745, "learning_rate": 2.9581791161686355e-07, "loss": 0.5218518972396851, "step": 15534 }, { "epoch": 19.061349693251532, "grad_norm": 0.2744821012020111, "learning_rate": 2.950485708084039e-07, "loss": 0.5624144077301025, "step": 15535 }, { "epoch": 19.062576687116565, "grad_norm": 0.2628687024116516, "learning_rate": 2.9428022578607827e-07, "loss": 0.614841103553772, "step": 15536 }, { "epoch": 19.063803680981597, "grad_norm": 0.2565420866012573, "learning_rate": 2.9351287658085093e-07, "loss": 0.5569916367530823, "step": 15537 }, { "epoch": 19.065030674846625, "grad_norm": 0.24239133298397064, "learning_rate": 2.9274652322365535e-07, "loss": 0.42598956823349, "step": 15538 }, { "epoch": 19.066257668711657, "grad_norm": 0.25568854808807373, "learning_rate": 2.9198116574538083e-07, "loss": 0.6859689354896545, "step": 15539 }, { "epoch": 19.067484662576685, "grad_norm": 0.26058533787727356, "learning_rate": 2.912168041768748e-07, "loss": 0.6602602005004883, "step": 15540 }, { "epoch": 19.068711656441717, "grad_norm": 0.23533086478710175, "learning_rate": 2.9045343854895156e-07, "loss": 0.4718015491962433, "step": 15541 }, { "epoch": 19.06993865030675, "grad_norm": 0.26327815651893616, "learning_rate": 2.896910688923726e-07, "loss": 0.4511849880218506, "step": 15542 }, { "epoch": 19.071165644171778, "grad_norm": 0.29071781039237976, "learning_rate": 2.889296952378717e-07, "loss": 0.6808133125305176, "step": 15543 }, { "epoch": 19.07239263803681, "grad_norm": 0.2691454291343689, "learning_rate": 2.881693176161354e-07, "loss": 0.693962574005127, "step": 15544 }, { "epoch": 19.073619631901842, "grad_norm": 0.22548571228981018, "learning_rate": 2.8740993605781416e-07, "loss": 0.4249667525291443, "step": 15545 }, { "epoch": 19.07484662576687, "grad_norm": 0.2482619732618332, "learning_rate": 2.866515505935169e-07, "loss": 0.6845868825912476, "step": 15546 }, { "epoch": 19.076073619631902, "grad_norm": 0.3004922568798065, "learning_rate": 2.8589416125381076e-07, "loss": 0.4892275333404541, "step": 15547 }, { "epoch": 19.07730061349693, "grad_norm": 0.2762199640274048, "learning_rate": 2.851377680692241e-07, "loss": 0.585484504699707, "step": 15548 }, { "epoch": 19.078527607361963, "grad_norm": 0.2855587899684906, "learning_rate": 2.843823710702437e-07, "loss": 0.4709779918193817, "step": 15549 }, { "epoch": 19.079754601226995, "grad_norm": 0.2726927697658539, "learning_rate": 2.8362797028732014e-07, "loss": 0.6216049790382385, "step": 15550 }, { "epoch": 19.080981595092023, "grad_norm": 0.3027653098106384, "learning_rate": 2.828745657508597e-07, "loss": 0.6423373818397522, "step": 15551 }, { "epoch": 19.082208588957055, "grad_norm": 0.24247443675994873, "learning_rate": 2.8212215749122975e-07, "loss": 0.6281909942626953, "step": 15552 }, { "epoch": 19.083435582822087, "grad_norm": 0.2667468190193176, "learning_rate": 2.8137074553875873e-07, "loss": 0.7013019919395447, "step": 15553 }, { "epoch": 19.084662576687116, "grad_norm": 0.24377913773059845, "learning_rate": 2.806203299237309e-07, "loss": 0.5487319827079773, "step": 15554 }, { "epoch": 19.085889570552148, "grad_norm": 0.25317683815956116, "learning_rate": 2.7987091067639694e-07, "loss": 0.6231718063354492, "step": 15555 }, { "epoch": 19.08711656441718, "grad_norm": 0.26848191022872925, "learning_rate": 2.7912248782696604e-07, "loss": 0.522138237953186, "step": 15556 }, { "epoch": 19.088343558282208, "grad_norm": 0.25168463587760925, "learning_rate": 2.7837506140559745e-07, "loss": 0.47587889432907104, "step": 15557 }, { "epoch": 19.08957055214724, "grad_norm": 0.31205639243125916, "learning_rate": 2.7762863144242256e-07, "loss": 0.621688961982727, "step": 15558 }, { "epoch": 19.09079754601227, "grad_norm": 0.2645886540412903, "learning_rate": 2.768831979675257e-07, "loss": 0.7117650508880615, "step": 15559 }, { "epoch": 19.0920245398773, "grad_norm": 0.23457367718219757, "learning_rate": 2.761387610109578e-07, "loss": 0.39615631103515625, "step": 15560 }, { "epoch": 19.093251533742333, "grad_norm": 0.25403016805648804, "learning_rate": 2.753953206027199e-07, "loss": 0.669093906879425, "step": 15561 }, { "epoch": 19.09447852760736, "grad_norm": 0.26722466945648193, "learning_rate": 2.746528767727796e-07, "loss": 0.6310396194458008, "step": 15562 }, { "epoch": 19.095705521472393, "grad_norm": 0.2863602936267853, "learning_rate": 2.7391142955106306e-07, "loss": 0.7216989994049072, "step": 15563 }, { "epoch": 19.096932515337425, "grad_norm": 0.2409089207649231, "learning_rate": 2.731709789674575e-07, "loss": 0.5503473281860352, "step": 15564 }, { "epoch": 19.098159509202453, "grad_norm": 0.2542863190174103, "learning_rate": 2.724315250518056e-07, "loss": 0.4675602912902832, "step": 15565 }, { "epoch": 19.099386503067485, "grad_norm": 0.3128001093864441, "learning_rate": 2.7169306783391413e-07, "loss": 0.5894114375114441, "step": 15566 }, { "epoch": 19.100613496932514, "grad_norm": 0.27291339635849, "learning_rate": 2.709556073435482e-07, "loss": 0.49833545088768005, "step": 15567 }, { "epoch": 19.101840490797546, "grad_norm": 0.2911783754825592, "learning_rate": 2.7021914361042844e-07, "loss": 0.8547276854515076, "step": 15568 }, { "epoch": 19.103067484662578, "grad_norm": 0.26530611515045166, "learning_rate": 2.694836766642478e-07, "loss": 0.5670586824417114, "step": 15569 }, { "epoch": 19.104294478527606, "grad_norm": 0.26550209522247314, "learning_rate": 2.687492065346464e-07, "loss": 0.6865995526313782, "step": 15570 }, { "epoch": 19.10552147239264, "grad_norm": 0.27784350514411926, "learning_rate": 2.6801573325122565e-07, "loss": 0.595111608505249, "step": 15571 }, { "epoch": 19.10674846625767, "grad_norm": 0.25161775946617126, "learning_rate": 2.6728325684355625e-07, "loss": 0.5138437747955322, "step": 15572 }, { "epoch": 19.1079754601227, "grad_norm": 0.2635716199874878, "learning_rate": 2.6655177734115634e-07, "loss": 0.48764345049858093, "step": 15573 }, { "epoch": 19.10920245398773, "grad_norm": 0.25207483768463135, "learning_rate": 2.6582129477351336e-07, "loss": 0.30982908606529236, "step": 15574 }, { "epoch": 19.11042944785276, "grad_norm": 0.29243355989456177, "learning_rate": 2.650918091700705e-07, "loss": 0.7353715300559998, "step": 15575 }, { "epoch": 19.11165644171779, "grad_norm": 0.2807004153728485, "learning_rate": 2.6436332056022917e-07, "loss": 0.580844521522522, "step": 15576 }, { "epoch": 19.112883435582823, "grad_norm": 0.25005725026130676, "learning_rate": 2.6363582897335484e-07, "loss": 0.4646947681903839, "step": 15577 }, { "epoch": 19.11411042944785, "grad_norm": 0.24425868690013885, "learning_rate": 2.629093344387684e-07, "loss": 0.7046242356300354, "step": 15578 }, { "epoch": 19.115337423312884, "grad_norm": 0.26282840967178345, "learning_rate": 2.6218383698575765e-07, "loss": 0.6420117616653442, "step": 15579 }, { "epoch": 19.116564417177916, "grad_norm": 0.272282212972641, "learning_rate": 2.6145933664355746e-07, "loss": 0.6844086647033691, "step": 15580 }, { "epoch": 19.117791411042944, "grad_norm": 0.2749355435371399, "learning_rate": 2.6073583344137787e-07, "loss": 0.5719791650772095, "step": 15581 }, { "epoch": 19.119018404907976, "grad_norm": 0.28105154633522034, "learning_rate": 2.600133274083788e-07, "loss": 0.7059277296066284, "step": 15582 }, { "epoch": 19.120245398773005, "grad_norm": 0.2757807970046997, "learning_rate": 2.5929181857368146e-07, "loss": 0.5308383703231812, "step": 15583 }, { "epoch": 19.121472392638037, "grad_norm": 0.2569369971752167, "learning_rate": 2.5857130696636813e-07, "loss": 0.5078203678131104, "step": 15584 }, { "epoch": 19.12269938650307, "grad_norm": 0.27016228437423706, "learning_rate": 2.5785179261547943e-07, "loss": 0.5279009342193604, "step": 15585 }, { "epoch": 19.123926380368097, "grad_norm": 0.2662789821624756, "learning_rate": 2.5713327555002e-07, "loss": 0.657353401184082, "step": 15586 }, { "epoch": 19.12515337423313, "grad_norm": 0.24857038259506226, "learning_rate": 2.564157557989472e-07, "loss": 0.6883531808853149, "step": 15587 }, { "epoch": 19.12638036809816, "grad_norm": 0.24477167427539825, "learning_rate": 2.556992333911851e-07, "loss": 0.4771386981010437, "step": 15588 }, { "epoch": 19.12760736196319, "grad_norm": 0.272159218788147, "learning_rate": 2.549837083556161e-07, "loss": 0.7366246581077576, "step": 15589 }, { "epoch": 19.12883435582822, "grad_norm": 0.2629441022872925, "learning_rate": 2.5426918072107566e-07, "loss": 0.47775810956954956, "step": 15590 }, { "epoch": 19.13006134969325, "grad_norm": 0.29105567932128906, "learning_rate": 2.5355565051636833e-07, "loss": 0.5888192653656006, "step": 15591 }, { "epoch": 19.131288343558282, "grad_norm": 0.2917846441268921, "learning_rate": 2.528431177702545e-07, "loss": 0.7695599794387817, "step": 15592 }, { "epoch": 19.132515337423314, "grad_norm": 0.2750089466571808, "learning_rate": 2.521315825114529e-07, "loss": 0.5887625217437744, "step": 15593 }, { "epoch": 19.133742331288342, "grad_norm": 0.23905760049819946, "learning_rate": 2.51421044768646e-07, "loss": 0.6818940043449402, "step": 15594 }, { "epoch": 19.134969325153374, "grad_norm": 0.2392745018005371, "learning_rate": 2.5071150457046944e-07, "loss": 0.4888817071914673, "step": 15595 }, { "epoch": 19.136196319018406, "grad_norm": 0.25896742939949036, "learning_rate": 2.500029619455252e-07, "loss": 0.6466255187988281, "step": 15596 }, { "epoch": 19.137423312883435, "grad_norm": 0.26890119910240173, "learning_rate": 2.492954169223738e-07, "loss": 0.4519672393798828, "step": 15597 }, { "epoch": 19.138650306748467, "grad_norm": 0.2772124707698822, "learning_rate": 2.4858886952953133e-07, "loss": 0.6026498079299927, "step": 15598 }, { "epoch": 19.139877300613495, "grad_norm": 0.24094004929065704, "learning_rate": 2.478833197954805e-07, "loss": 0.44715774059295654, "step": 15599 }, { "epoch": 19.141104294478527, "grad_norm": 0.21787197887897491, "learning_rate": 2.4717876774865425e-07, "loss": 0.46103155612945557, "step": 15600 }, { "epoch": 19.14233128834356, "grad_norm": 0.22210556268692017, "learning_rate": 2.464752134174547e-07, "loss": 0.46309924125671387, "step": 15601 }, { "epoch": 19.143558282208588, "grad_norm": 0.2457238733768463, "learning_rate": 2.4577265683024265e-07, "loss": 0.6431790590286255, "step": 15602 }, { "epoch": 19.14478527607362, "grad_norm": 0.31417316198349, "learning_rate": 2.4507109801533423e-07, "loss": 0.3598482310771942, "step": 15603 }, { "epoch": 19.14601226993865, "grad_norm": 0.2731918394565582, "learning_rate": 2.443705370010041e-07, "loss": 0.5284492373466492, "step": 15604 }, { "epoch": 19.14723926380368, "grad_norm": 0.2393893301486969, "learning_rate": 2.436709738154935e-07, "loss": 0.5362446308135986, "step": 15605 }, { "epoch": 19.148466257668712, "grad_norm": 0.2542176842689514, "learning_rate": 2.4297240848699943e-07, "loss": 0.6258782148361206, "step": 15606 }, { "epoch": 19.14969325153374, "grad_norm": 0.25948724150657654, "learning_rate": 2.4227484104367704e-07, "loss": 0.6996631622314453, "step": 15607 }, { "epoch": 19.150920245398773, "grad_norm": 0.32072097063064575, "learning_rate": 2.4157827151364554e-07, "loss": 0.6921621561050415, "step": 15608 }, { "epoch": 19.152147239263805, "grad_norm": 0.24242158234119415, "learning_rate": 2.408826999249797e-07, "loss": 0.2629132568836212, "step": 15609 }, { "epoch": 19.153374233128833, "grad_norm": 0.2632128596305847, "learning_rate": 2.4018812630571543e-07, "loss": 0.6644801497459412, "step": 15610 }, { "epoch": 19.154601226993865, "grad_norm": 0.3048565983772278, "learning_rate": 2.394945506838525e-07, "loss": 0.6476645469665527, "step": 15611 }, { "epoch": 19.155828220858897, "grad_norm": 0.25412169098854065, "learning_rate": 2.388019730873464e-07, "loss": 0.6835761070251465, "step": 15612 }, { "epoch": 19.157055214723925, "grad_norm": 0.2939128875732422, "learning_rate": 2.381103935441109e-07, "loss": 0.6096471548080444, "step": 15613 }, { "epoch": 19.158282208588957, "grad_norm": 0.24789486825466156, "learning_rate": 2.374198120820209e-07, "loss": 0.43319571018218994, "step": 15614 }, { "epoch": 19.15950920245399, "grad_norm": 0.23721913993358612, "learning_rate": 2.3673022872891527e-07, "loss": 0.5442913770675659, "step": 15615 }, { "epoch": 19.160736196319018, "grad_norm": 0.2714819610118866, "learning_rate": 2.360416435125856e-07, "loss": 0.6057541370391846, "step": 15616 }, { "epoch": 19.16196319018405, "grad_norm": 0.26407966017723083, "learning_rate": 2.3535405646078756e-07, "loss": 0.6631194353103638, "step": 15617 }, { "epoch": 19.16319018404908, "grad_norm": 0.2978915870189667, "learning_rate": 2.3466746760123782e-07, "loss": 0.6162807941436768, "step": 15618 }, { "epoch": 19.16441717791411, "grad_norm": 0.32170945405960083, "learning_rate": 2.3398187696160867e-07, "loss": 0.4455367922782898, "step": 15619 }, { "epoch": 19.165644171779142, "grad_norm": 0.25322800874710083, "learning_rate": 2.3329728456953638e-07, "loss": 0.661132276058197, "step": 15620 }, { "epoch": 19.16687116564417, "grad_norm": 0.2916284203529358, "learning_rate": 2.3261369045261273e-07, "loss": 0.5665102601051331, "step": 15621 }, { "epoch": 19.168098159509203, "grad_norm": 0.2513220012187958, "learning_rate": 2.3193109463839347e-07, "loss": 0.5695427656173706, "step": 15622 }, { "epoch": 19.169325153374235, "grad_norm": 0.26983973383903503, "learning_rate": 2.3124949715438992e-07, "loss": 0.6138904094696045, "step": 15623 }, { "epoch": 19.170552147239263, "grad_norm": 0.3260478377342224, "learning_rate": 2.3056889802807457e-07, "loss": 0.6232873201370239, "step": 15624 }, { "epoch": 19.171779141104295, "grad_norm": 0.2713378369808197, "learning_rate": 2.2988929728688659e-07, "loss": 0.5095798373222351, "step": 15625 }, { "epoch": 19.173006134969324, "grad_norm": 0.24961231648921967, "learning_rate": 2.2921069495820957e-07, "loss": 0.47833728790283203, "step": 15626 }, { "epoch": 19.174233128834356, "grad_norm": 0.2729978859424591, "learning_rate": 2.2853309106940502e-07, "loss": 0.637974739074707, "step": 15627 }, { "epoch": 19.175460122699388, "grad_norm": 0.2824903428554535, "learning_rate": 2.278564856477816e-07, "loss": 0.4462968111038208, "step": 15628 }, { "epoch": 19.176687116564416, "grad_norm": 0.2644289433956146, "learning_rate": 2.271808787206092e-07, "loss": 0.5875904560089111, "step": 15629 }, { "epoch": 19.177914110429448, "grad_norm": 0.2935873866081238, "learning_rate": 2.2650627031511884e-07, "loss": 0.7554188370704651, "step": 15630 }, { "epoch": 19.17914110429448, "grad_norm": 0.26379087567329407, "learning_rate": 2.2583266045850814e-07, "loss": 0.4860280156135559, "step": 15631 }, { "epoch": 19.18036809815951, "grad_norm": 0.27808013558387756, "learning_rate": 2.251600491779249e-07, "loss": 0.6582149267196655, "step": 15632 }, { "epoch": 19.18159509202454, "grad_norm": 0.2750902771949768, "learning_rate": 2.244884365004779e-07, "loss": 0.5358799695968628, "step": 15633 }, { "epoch": 19.18282208588957, "grad_norm": 0.20725314319133759, "learning_rate": 2.2381782245324001e-07, "loss": 0.32288414239883423, "step": 15634 }, { "epoch": 19.1840490797546, "grad_norm": 0.25065287947654724, "learning_rate": 2.2314820706324236e-07, "loss": 0.6896646022796631, "step": 15635 }, { "epoch": 19.185276073619633, "grad_norm": 0.2905101776123047, "learning_rate": 2.2247959035747447e-07, "loss": 0.5678698420524597, "step": 15636 }, { "epoch": 19.18650306748466, "grad_norm": 0.25215017795562744, "learning_rate": 2.2181197236288697e-07, "loss": 0.40838098526000977, "step": 15637 }, { "epoch": 19.187730061349694, "grad_norm": 0.26705026626586914, "learning_rate": 2.2114535310639173e-07, "loss": 0.5971882939338684, "step": 15638 }, { "epoch": 19.188957055214726, "grad_norm": 0.234080508351326, "learning_rate": 2.2047973261485334e-07, "loss": 0.4631671905517578, "step": 15639 }, { "epoch": 19.190184049079754, "grad_norm": 0.274194598197937, "learning_rate": 2.198151109151031e-07, "loss": 0.7547903060913086, "step": 15640 }, { "epoch": 19.191411042944786, "grad_norm": 0.25741973519325256, "learning_rate": 2.191514880339307e-07, "loss": 0.4803391098976135, "step": 15641 }, { "epoch": 19.192638036809814, "grad_norm": 0.308315247297287, "learning_rate": 2.1848886399808422e-07, "loss": 0.7040413022041321, "step": 15642 }, { "epoch": 19.193865030674846, "grad_norm": 0.2893327474594116, "learning_rate": 2.1782723883427282e-07, "loss": 0.8077352046966553, "step": 15643 }, { "epoch": 19.19509202453988, "grad_norm": 0.26443737745285034, "learning_rate": 2.171666125691668e-07, "loss": 0.7435310482978821, "step": 15644 }, { "epoch": 19.196319018404907, "grad_norm": 0.27335280179977417, "learning_rate": 2.1650698522938938e-07, "loss": 0.7214279174804688, "step": 15645 }, { "epoch": 19.19754601226994, "grad_norm": 0.2606821656227112, "learning_rate": 2.1584835684152759e-07, "loss": 0.6016335487365723, "step": 15646 }, { "epoch": 19.19877300613497, "grad_norm": 0.2935357987880707, "learning_rate": 2.1519072743213797e-07, "loss": 0.2848409414291382, "step": 15647 }, { "epoch": 19.2, "grad_norm": 0.2639932632446289, "learning_rate": 2.1453409702771877e-07, "loss": 0.6620244383811951, "step": 15648 }, { "epoch": 19.20122699386503, "grad_norm": 0.29775023460388184, "learning_rate": 2.1387846565474045e-07, "loss": 0.6931723952293396, "step": 15649 }, { "epoch": 19.20245398773006, "grad_norm": 0.25157430768013, "learning_rate": 2.1322383333962636e-07, "loss": 0.6132462024688721, "step": 15650 }, { "epoch": 19.20368098159509, "grad_norm": 0.26053595542907715, "learning_rate": 2.1257020010876926e-07, "loss": 0.6839142441749573, "step": 15651 }, { "epoch": 19.204907975460124, "grad_norm": 0.25885042548179626, "learning_rate": 2.119175659885092e-07, "loss": 0.644181489944458, "step": 15652 }, { "epoch": 19.206134969325152, "grad_norm": 0.2844583988189697, "learning_rate": 2.1126593100515567e-07, "loss": 0.6981903314590454, "step": 15653 }, { "epoch": 19.207361963190184, "grad_norm": 0.24492456018924713, "learning_rate": 2.1061529518497103e-07, "loss": 0.5627588629722595, "step": 15654 }, { "epoch": 19.208588957055216, "grad_norm": 0.2486869841814041, "learning_rate": 2.0996565855418427e-07, "loss": 0.6487361192703247, "step": 15655 }, { "epoch": 19.209815950920245, "grad_norm": 0.25746220350265503, "learning_rate": 2.093170211389772e-07, "loss": 0.49360519647598267, "step": 15656 }, { "epoch": 19.211042944785277, "grad_norm": 0.27797210216522217, "learning_rate": 2.0866938296549565e-07, "loss": 0.5869017243385315, "step": 15657 }, { "epoch": 19.212269938650305, "grad_norm": 0.2552661895751953, "learning_rate": 2.0802274405984646e-07, "loss": 0.5413323044776917, "step": 15658 }, { "epoch": 19.213496932515337, "grad_norm": 0.2646702229976654, "learning_rate": 2.0737710444808932e-07, "loss": 0.539040207862854, "step": 15659 }, { "epoch": 19.21472392638037, "grad_norm": 0.24074086546897888, "learning_rate": 2.0673246415624792e-07, "loss": 0.4676140546798706, "step": 15660 }, { "epoch": 19.215950920245398, "grad_norm": 0.24642446637153625, "learning_rate": 2.0608882321031254e-07, "loss": 0.5153641104698181, "step": 15661 }, { "epoch": 19.21717791411043, "grad_norm": 0.26997655630111694, "learning_rate": 2.0544618163622076e-07, "loss": 0.4566311240196228, "step": 15662 }, { "epoch": 19.21840490797546, "grad_norm": 0.27587464451789856, "learning_rate": 2.0480453945987688e-07, "loss": 0.4805257320404053, "step": 15663 }, { "epoch": 19.21963190184049, "grad_norm": 0.27641117572784424, "learning_rate": 2.0416389670714354e-07, "loss": 0.5234161615371704, "step": 15664 }, { "epoch": 19.220858895705522, "grad_norm": 0.3027830123901367, "learning_rate": 2.035242534038445e-07, "loss": 0.6665984392166138, "step": 15665 }, { "epoch": 19.22208588957055, "grad_norm": 0.2664552628993988, "learning_rate": 2.028856095757592e-07, "loss": 0.6065563559532166, "step": 15666 }, { "epoch": 19.223312883435582, "grad_norm": 0.2753150463104248, "learning_rate": 2.0224796524863366e-07, "loss": 0.6895867586135864, "step": 15667 }, { "epoch": 19.224539877300614, "grad_norm": 0.28075286746025085, "learning_rate": 2.0161132044816676e-07, "loss": 0.49532604217529297, "step": 15668 }, { "epoch": 19.225766871165643, "grad_norm": 0.2576037645339966, "learning_rate": 2.009756752000186e-07, "loss": 0.5182088017463684, "step": 15669 }, { "epoch": 19.226993865030675, "grad_norm": 0.2769133448600769, "learning_rate": 2.0034102952981305e-07, "loss": 0.6898492574691772, "step": 15670 }, { "epoch": 19.228220858895707, "grad_norm": 0.25994202494621277, "learning_rate": 1.9970738346313246e-07, "loss": 0.6196413040161133, "step": 15671 }, { "epoch": 19.229447852760735, "grad_norm": 0.27102354168891907, "learning_rate": 1.9907473702551194e-07, "loss": 0.466544508934021, "step": 15672 }, { "epoch": 19.230674846625767, "grad_norm": 0.243444561958313, "learning_rate": 1.9844309024245334e-07, "loss": 0.4627355933189392, "step": 15673 }, { "epoch": 19.2319018404908, "grad_norm": 0.2584282457828522, "learning_rate": 1.978124431394196e-07, "loss": 0.3388047218322754, "step": 15674 }, { "epoch": 19.233128834355828, "grad_norm": 0.29062268137931824, "learning_rate": 1.9718279574182653e-07, "loss": 0.6275640726089478, "step": 15675 }, { "epoch": 19.23435582822086, "grad_norm": 0.2674814462661743, "learning_rate": 1.9655414807505658e-07, "loss": 0.6290971040725708, "step": 15676 }, { "epoch": 19.235582822085888, "grad_norm": 0.2815602123737335, "learning_rate": 1.9592650016444503e-07, "loss": 0.4671408236026764, "step": 15677 }, { "epoch": 19.23680981595092, "grad_norm": 0.2835483253002167, "learning_rate": 1.9529985203529388e-07, "loss": 0.5751950144767761, "step": 15678 }, { "epoch": 19.238036809815952, "grad_norm": 0.28011080622673035, "learning_rate": 1.9467420371286071e-07, "loss": 0.5261114835739136, "step": 15679 }, { "epoch": 19.23926380368098, "grad_norm": 0.25334927439689636, "learning_rate": 1.9404955522236422e-07, "loss": 0.6303417086601257, "step": 15680 }, { "epoch": 19.240490797546013, "grad_norm": 0.29517796635627747, "learning_rate": 1.9342590658898153e-07, "loss": 0.593755841255188, "step": 15681 }, { "epoch": 19.241717791411045, "grad_norm": 0.2704833745956421, "learning_rate": 1.928032578378508e-07, "loss": 0.6073254942893982, "step": 15682 }, { "epoch": 19.242944785276073, "grad_norm": 0.26108258962631226, "learning_rate": 1.921816089940659e-07, "loss": 0.5615946054458618, "step": 15683 }, { "epoch": 19.244171779141105, "grad_norm": 0.2662059962749481, "learning_rate": 1.9156096008268732e-07, "loss": 0.7788010835647583, "step": 15684 }, { "epoch": 19.245398773006134, "grad_norm": 0.28087398409843445, "learning_rate": 1.9094131112872837e-07, "loss": 0.654100239276886, "step": 15685 }, { "epoch": 19.246625766871166, "grad_norm": 0.27967190742492676, "learning_rate": 1.9032266215717188e-07, "loss": 0.5717660188674927, "step": 15686 }, { "epoch": 19.247852760736198, "grad_norm": 0.2547641098499298, "learning_rate": 1.8970501319294787e-07, "loss": 0.5374426245689392, "step": 15687 }, { "epoch": 19.249079754601226, "grad_norm": 0.30670973658561707, "learning_rate": 1.8908836426095312e-07, "loss": 0.5804160833358765, "step": 15688 }, { "epoch": 19.250306748466258, "grad_norm": 0.2664736807346344, "learning_rate": 1.8847271538604272e-07, "loss": 0.6950273513793945, "step": 15689 }, { "epoch": 19.25153374233129, "grad_norm": 0.28548797965049744, "learning_rate": 1.8785806659303295e-07, "loss": 0.624729573726654, "step": 15690 }, { "epoch": 19.25276073619632, "grad_norm": 0.28483811020851135, "learning_rate": 1.8724441790669568e-07, "loss": 0.6761590242385864, "step": 15691 }, { "epoch": 19.25398773006135, "grad_norm": 0.2748323976993561, "learning_rate": 1.8663176935177218e-07, "loss": 0.7182489633560181, "step": 15692 }, { "epoch": 19.25521472392638, "grad_norm": 0.26820382475852966, "learning_rate": 1.860201209529483e-07, "loss": 0.5007621049880981, "step": 15693 }, { "epoch": 19.25644171779141, "grad_norm": 0.2610996663570404, "learning_rate": 1.8540947273488484e-07, "loss": 0.6145170331001282, "step": 15694 }, { "epoch": 19.257668711656443, "grad_norm": 0.2924644351005554, "learning_rate": 1.8479982472218994e-07, "loss": 0.7354214191436768, "step": 15695 }, { "epoch": 19.25889570552147, "grad_norm": 0.24042746424674988, "learning_rate": 1.841911769394411e-07, "loss": 0.46986573934555054, "step": 15696 }, { "epoch": 19.260122699386503, "grad_norm": 0.27207738161087036, "learning_rate": 1.8358352941116596e-07, "loss": 0.5326989889144897, "step": 15697 }, { "epoch": 19.261349693251535, "grad_norm": 0.25708624720573425, "learning_rate": 1.829768821618616e-07, "loss": 0.42855197191238403, "step": 15698 }, { "epoch": 19.262576687116564, "grad_norm": 0.23511746525764465, "learning_rate": 1.8237123521597788e-07, "loss": 0.45312345027923584, "step": 15699 }, { "epoch": 19.263803680981596, "grad_norm": 0.3303406834602356, "learning_rate": 1.817665885979314e-07, "loss": 0.6579532623291016, "step": 15700 }, { "epoch": 19.265030674846624, "grad_norm": 0.26569119095802307, "learning_rate": 1.8116294233208598e-07, "loss": 0.5076731443405151, "step": 15701 }, { "epoch": 19.266257668711656, "grad_norm": 0.2574261426925659, "learning_rate": 1.8056029644277772e-07, "loss": 0.5825338363647461, "step": 15702 }, { "epoch": 19.26748466257669, "grad_norm": 0.24608305096626282, "learning_rate": 1.7995865095429832e-07, "loss": 0.5878791809082031, "step": 15703 }, { "epoch": 19.268711656441717, "grad_norm": 0.2769867181777954, "learning_rate": 1.7935800589089502e-07, "loss": 0.39420732855796814, "step": 15704 }, { "epoch": 19.26993865030675, "grad_norm": 0.2668036222457886, "learning_rate": 1.7875836127677903e-07, "loss": 0.7374603748321533, "step": 15705 }, { "epoch": 19.27116564417178, "grad_norm": 0.3071646988391876, "learning_rate": 1.7815971713612268e-07, "loss": 0.7182461023330688, "step": 15706 }, { "epoch": 19.27239263803681, "grad_norm": 0.3189086616039276, "learning_rate": 1.7756207349305387e-07, "loss": 0.7317497134208679, "step": 15707 }, { "epoch": 19.27361963190184, "grad_norm": 0.30117762088775635, "learning_rate": 1.769654303716617e-07, "loss": 0.7006818652153015, "step": 15708 }, { "epoch": 19.27484662576687, "grad_norm": 0.2477964460849762, "learning_rate": 1.7636978779599633e-07, "loss": 0.48170962929725647, "step": 15709 }, { "epoch": 19.2760736196319, "grad_norm": 0.26466891169548035, "learning_rate": 1.757751457900636e-07, "loss": 0.5916324853897095, "step": 15710 }, { "epoch": 19.277300613496934, "grad_norm": 0.2800232172012329, "learning_rate": 1.7518150437783597e-07, "loss": 0.6216551661491394, "step": 15711 }, { "epoch": 19.278527607361962, "grad_norm": 0.2177606076002121, "learning_rate": 1.74588863583236e-07, "loss": 0.4567154347896576, "step": 15712 }, { "epoch": 19.279754601226994, "grad_norm": 0.27706941962242126, "learning_rate": 1.7399722343015846e-07, "loss": 0.7275480031967163, "step": 15713 }, { "epoch": 19.280981595092026, "grad_norm": 0.30545711517333984, "learning_rate": 1.7340658394244537e-07, "loss": 0.6275465488433838, "step": 15714 }, { "epoch": 19.282208588957054, "grad_norm": 0.24607671797275543, "learning_rate": 1.7281694514390546e-07, "loss": 0.42245131731033325, "step": 15715 }, { "epoch": 19.283435582822086, "grad_norm": 0.2522715628147125, "learning_rate": 1.7222830705830583e-07, "loss": 0.6712870597839355, "step": 15716 }, { "epoch": 19.284662576687115, "grad_norm": 0.24713724851608276, "learning_rate": 1.716406697093692e-07, "loss": 0.11985582113265991, "step": 15717 }, { "epoch": 19.285889570552147, "grad_norm": 0.23300576210021973, "learning_rate": 1.7105403312078772e-07, "loss": 0.5070394277572632, "step": 15718 }, { "epoch": 19.28711656441718, "grad_norm": 0.29088982939720154, "learning_rate": 1.7046839731620357e-07, "loss": 0.5711818933486938, "step": 15719 }, { "epoch": 19.288343558282207, "grad_norm": 0.3080708682537079, "learning_rate": 1.698837623192201e-07, "loss": 0.6905769109725952, "step": 15720 }, { "epoch": 19.28957055214724, "grad_norm": 0.2848478853702545, "learning_rate": 1.6930012815340736e-07, "loss": 0.626186728477478, "step": 15721 }, { "epoch": 19.29079754601227, "grad_norm": 0.25921517610549927, "learning_rate": 1.687174948422854e-07, "loss": 0.7007246017456055, "step": 15722 }, { "epoch": 19.2920245398773, "grad_norm": 0.3125583827495575, "learning_rate": 1.681358624093382e-07, "loss": 0.5929790735244751, "step": 15723 }, { "epoch": 19.293251533742332, "grad_norm": 0.24967384338378906, "learning_rate": 1.675552308780165e-07, "loss": 0.4132218062877655, "step": 15724 }, { "epoch": 19.29447852760736, "grad_norm": 0.2761126756668091, "learning_rate": 1.6697560027171544e-07, "loss": 0.5265856981277466, "step": 15725 }, { "epoch": 19.295705521472392, "grad_norm": 0.25898540019989014, "learning_rate": 1.6639697061380243e-07, "loss": 0.3211030960083008, "step": 15726 }, { "epoch": 19.296932515337424, "grad_norm": 0.24974408745765686, "learning_rate": 1.658193419276005e-07, "loss": 0.5403602719306946, "step": 15727 }, { "epoch": 19.298159509202453, "grad_norm": 0.2762167155742645, "learning_rate": 1.6524271423639103e-07, "loss": 0.6595354080200195, "step": 15728 }, { "epoch": 19.299386503067485, "grad_norm": 0.27492180466651917, "learning_rate": 1.6466708756341932e-07, "loss": 0.47847893834114075, "step": 15729 }, { "epoch": 19.300613496932517, "grad_norm": 0.2709231376647949, "learning_rate": 1.6409246193188065e-07, "loss": 0.6940353512763977, "step": 15730 }, { "epoch": 19.301840490797545, "grad_norm": 0.27758049964904785, "learning_rate": 1.6351883736494268e-07, "loss": 0.684998095035553, "step": 15731 }, { "epoch": 19.303067484662577, "grad_norm": 0.23531819880008698, "learning_rate": 1.6294621388572296e-07, "loss": 0.2514550983905792, "step": 15732 }, { "epoch": 19.30429447852761, "grad_norm": 0.2790698707103729, "learning_rate": 1.6237459151730583e-07, "loss": 0.413646936416626, "step": 15733 }, { "epoch": 19.305521472392638, "grad_norm": 0.3102453649044037, "learning_rate": 1.6180397028272844e-07, "loss": 0.6367359757423401, "step": 15734 }, { "epoch": 19.30674846625767, "grad_norm": 0.25980275869369507, "learning_rate": 1.6123435020499455e-07, "loss": 0.5755482912063599, "step": 15735 }, { "epoch": 19.307975460122698, "grad_norm": 0.25953999161720276, "learning_rate": 1.606657313070581e-07, "loss": 0.6423062682151794, "step": 15736 }, { "epoch": 19.30920245398773, "grad_norm": 0.2520987391471863, "learning_rate": 1.600981136118451e-07, "loss": 0.6856333017349243, "step": 15737 }, { "epoch": 19.310429447852762, "grad_norm": 0.26205897331237793, "learning_rate": 1.5953149714222904e-07, "loss": 0.42693936824798584, "step": 15738 }, { "epoch": 19.31165644171779, "grad_norm": 0.2931806743144989, "learning_rate": 1.5896588192105544e-07, "loss": 0.648865282535553, "step": 15739 }, { "epoch": 19.312883435582823, "grad_norm": 0.28576067090034485, "learning_rate": 1.5840126797111444e-07, "loss": 0.6645696759223938, "step": 15740 }, { "epoch": 19.314110429447855, "grad_norm": 0.24995481967926025, "learning_rate": 1.5783765531516837e-07, "loss": 0.6978772878646851, "step": 15741 }, { "epoch": 19.315337423312883, "grad_norm": 0.24016039073467255, "learning_rate": 1.5727504397593517e-07, "loss": 0.5503772497177124, "step": 15742 }, { "epoch": 19.316564417177915, "grad_norm": 0.2745150327682495, "learning_rate": 1.5671343397609117e-07, "loss": 0.8297175168991089, "step": 15743 }, { "epoch": 19.317791411042943, "grad_norm": 0.2715660631656647, "learning_rate": 1.5615282533827381e-07, "loss": 0.6635775566101074, "step": 15744 }, { "epoch": 19.319018404907975, "grad_norm": 0.2674633860588074, "learning_rate": 1.5559321808507888e-07, "loss": 0.5984900593757629, "step": 15745 }, { "epoch": 19.320245398773007, "grad_norm": 0.23603837192058563, "learning_rate": 1.5503461223906058e-07, "loss": 0.4508005678653717, "step": 15746 }, { "epoch": 19.321472392638036, "grad_norm": 0.26100143790245056, "learning_rate": 1.5447700782273976e-07, "loss": 0.5953958034515381, "step": 15747 }, { "epoch": 19.322699386503068, "grad_norm": 0.20635277032852173, "learning_rate": 1.5392040485858738e-07, "loss": 0.4517844617366791, "step": 15748 }, { "epoch": 19.3239263803681, "grad_norm": 0.3128073215484619, "learning_rate": 1.5336480336904103e-07, "loss": 0.6955556869506836, "step": 15749 }, { "epoch": 19.32515337423313, "grad_norm": 0.26238998770713806, "learning_rate": 1.5281020337649664e-07, "loss": 0.576416015625, "step": 15750 }, { "epoch": 19.32638036809816, "grad_norm": 0.264708548784256, "learning_rate": 1.5225660490330307e-07, "loss": 0.536928653717041, "step": 15751 }, { "epoch": 19.32760736196319, "grad_norm": 0.26538512110710144, "learning_rate": 1.5170400797177853e-07, "loss": 0.5383561849594116, "step": 15752 }, { "epoch": 19.32883435582822, "grad_norm": 0.3585108518600464, "learning_rate": 1.5115241260419687e-07, "loss": 0.4883931875228882, "step": 15753 }, { "epoch": 19.330061349693253, "grad_norm": 0.25332963466644287, "learning_rate": 1.5060181882279035e-07, "loss": 0.6847756505012512, "step": 15754 }, { "epoch": 19.33128834355828, "grad_norm": 0.2801564335823059, "learning_rate": 1.5005222664975228e-07, "loss": 0.6801108121871948, "step": 15755 }, { "epoch": 19.332515337423313, "grad_norm": 0.2803504765033722, "learning_rate": 1.495036361072344e-07, "loss": 0.6735873222351074, "step": 15756 }, { "epoch": 19.333742331288345, "grad_norm": 0.2586252689361572, "learning_rate": 1.489560472173468e-07, "loss": 0.4560503661632538, "step": 15757 }, { "epoch": 19.334969325153374, "grad_norm": 0.2740887403488159, "learning_rate": 1.4840946000216628e-07, "loss": 0.4215601682662964, "step": 15758 }, { "epoch": 19.336196319018406, "grad_norm": 0.32748526334762573, "learning_rate": 1.4786387448371963e-07, "loss": 0.6028684377670288, "step": 15759 }, { "epoch": 19.337423312883434, "grad_norm": 0.2679018974304199, "learning_rate": 1.473192906840004e-07, "loss": 0.5579320192337036, "step": 15760 }, { "epoch": 19.338650306748466, "grad_norm": 0.25507208704948425, "learning_rate": 1.4677570862496049e-07, "loss": 0.6786926984786987, "step": 15761 }, { "epoch": 19.339877300613498, "grad_norm": 0.299342542886734, "learning_rate": 1.4623312832850455e-07, "loss": 0.6689451932907104, "step": 15762 }, { "epoch": 19.341104294478527, "grad_norm": 0.25550568103790283, "learning_rate": 1.4569154981650957e-07, "loss": 0.5636083483695984, "step": 15763 }, { "epoch": 19.34233128834356, "grad_norm": 0.21735846996307373, "learning_rate": 1.4515097311079973e-07, "loss": 0.38089361786842346, "step": 15764 }, { "epoch": 19.34355828220859, "grad_norm": 0.29939004778862, "learning_rate": 1.4461139823316595e-07, "loss": 0.6110247373580933, "step": 15765 }, { "epoch": 19.34478527607362, "grad_norm": 0.2742545008659363, "learning_rate": 1.4407282520535747e-07, "loss": 0.508581817150116, "step": 15766 }, { "epoch": 19.34601226993865, "grad_norm": 0.26144132018089294, "learning_rate": 1.4353525404908198e-07, "loss": 0.6471553444862366, "step": 15767 }, { "epoch": 19.34723926380368, "grad_norm": 0.25421637296676636, "learning_rate": 1.429986847860082e-07, "loss": 0.5721098184585571, "step": 15768 }, { "epoch": 19.34846625766871, "grad_norm": 0.25687286257743835, "learning_rate": 1.4246311743776052e-07, "loss": 0.725675106048584, "step": 15769 }, { "epoch": 19.349693251533743, "grad_norm": 0.2701999843120575, "learning_rate": 1.4192855202593002e-07, "loss": 0.582121729850769, "step": 15770 }, { "epoch": 19.350920245398772, "grad_norm": 0.2289995700120926, "learning_rate": 1.4139498857206335e-07, "loss": 0.5169103145599365, "step": 15771 }, { "epoch": 19.352147239263804, "grad_norm": 0.2467261403799057, "learning_rate": 1.4086242709766273e-07, "loss": 0.6166015863418579, "step": 15772 }, { "epoch": 19.353374233128836, "grad_norm": 0.26953253149986267, "learning_rate": 1.403308676241999e-07, "loss": 0.6285051107406616, "step": 15773 }, { "epoch": 19.354601226993864, "grad_norm": 0.26015618443489075, "learning_rate": 1.398003101730966e-07, "loss": 0.43505793809890747, "step": 15774 }, { "epoch": 19.355828220858896, "grad_norm": 0.2975304424762726, "learning_rate": 1.3927075476574125e-07, "loss": 0.7155896425247192, "step": 15775 }, { "epoch": 19.357055214723925, "grad_norm": 0.26385679841041565, "learning_rate": 1.3874220142347515e-07, "loss": 0.5783834457397461, "step": 15776 }, { "epoch": 19.358282208588957, "grad_norm": 0.31113341450691223, "learning_rate": 1.382146501676035e-07, "loss": 0.5601823925971985, "step": 15777 }, { "epoch": 19.35950920245399, "grad_norm": 0.27217990159988403, "learning_rate": 1.3768810101939256e-07, "loss": 0.7729296684265137, "step": 15778 }, { "epoch": 19.360736196319017, "grad_norm": 0.28225427865982056, "learning_rate": 1.371625540000643e-07, "loss": 0.75234055519104, "step": 15779 }, { "epoch": 19.36196319018405, "grad_norm": 0.23642419278621674, "learning_rate": 1.366380091308017e-07, "loss": 0.5655539631843567, "step": 15780 }, { "epoch": 19.36319018404908, "grad_norm": 0.2754266560077667, "learning_rate": 1.3611446643274905e-07, "loss": 0.6342481970787048, "step": 15781 }, { "epoch": 19.36441717791411, "grad_norm": 0.2841984033584595, "learning_rate": 1.3559192592700888e-07, "loss": 0.5084980130195618, "step": 15782 }, { "epoch": 19.36564417177914, "grad_norm": 0.27847516536712646, "learning_rate": 1.350703876346421e-07, "loss": 0.6208343505859375, "step": 15783 }, { "epoch": 19.36687116564417, "grad_norm": 0.2644451856613159, "learning_rate": 1.3454985157667076e-07, "loss": 0.6807302236557007, "step": 15784 }, { "epoch": 19.368098159509202, "grad_norm": 0.25107067823410034, "learning_rate": 1.3403031777407816e-07, "loss": 0.5898149013519287, "step": 15785 }, { "epoch": 19.369325153374234, "grad_norm": 0.2669815421104431, "learning_rate": 1.3351178624780304e-07, "loss": 0.4053003191947937, "step": 15786 }, { "epoch": 19.370552147239263, "grad_norm": 0.2503422200679779, "learning_rate": 1.3299425701874535e-07, "loss": 0.572515606880188, "step": 15787 }, { "epoch": 19.371779141104295, "grad_norm": 0.2358214110136032, "learning_rate": 1.324777301077662e-07, "loss": 0.5711122751235962, "step": 15788 }, { "epoch": 19.373006134969327, "grad_norm": 0.2685088813304901, "learning_rate": 1.3196220553568505e-07, "loss": 0.5711294412612915, "step": 15789 }, { "epoch": 19.374233128834355, "grad_norm": 0.29895490407943726, "learning_rate": 1.314476833232825e-07, "loss": 0.6073800921440125, "step": 15790 }, { "epoch": 19.375460122699387, "grad_norm": 0.23435109853744507, "learning_rate": 1.3093416349129474e-07, "loss": 0.4982273578643799, "step": 15791 }, { "epoch": 19.376687116564415, "grad_norm": 0.2646413743495941, "learning_rate": 1.3042164606042462e-07, "loss": 0.6578046083450317, "step": 15792 }, { "epoch": 19.377914110429447, "grad_norm": 0.2803557813167572, "learning_rate": 1.2991013105132787e-07, "loss": 0.6516211032867432, "step": 15793 }, { "epoch": 19.37914110429448, "grad_norm": 0.23532381653785706, "learning_rate": 1.2939961848461857e-07, "loss": 0.5472257137298584, "step": 15794 }, { "epoch": 19.380368098159508, "grad_norm": 0.2670692205429077, "learning_rate": 1.2889010838088023e-07, "loss": 0.7223361730575562, "step": 15795 }, { "epoch": 19.38159509202454, "grad_norm": 0.2556460201740265, "learning_rate": 1.2838160076064644e-07, "loss": 0.7346701622009277, "step": 15796 }, { "epoch": 19.382822085889572, "grad_norm": 0.25721481442451477, "learning_rate": 1.2787409564441467e-07, "loss": 0.6430968046188354, "step": 15797 }, { "epoch": 19.3840490797546, "grad_norm": 0.21590790152549744, "learning_rate": 1.27367593052638e-07, "loss": 0.554527223110199, "step": 15798 }, { "epoch": 19.385276073619632, "grad_norm": 0.2908580005168915, "learning_rate": 1.2686209300573903e-07, "loss": 0.6397587060928345, "step": 15799 }, { "epoch": 19.38650306748466, "grad_norm": 0.2682172656059265, "learning_rate": 1.2635759552408476e-07, "loss": 0.6351278424263, "step": 15800 }, { "epoch": 19.387730061349693, "grad_norm": 0.23552776873111725, "learning_rate": 1.258541006280145e-07, "loss": 0.5623694658279419, "step": 15801 }, { "epoch": 19.388957055214725, "grad_norm": 0.2573508322238922, "learning_rate": 1.253516083378231e-07, "loss": 0.6185847520828247, "step": 15802 }, { "epoch": 19.390184049079753, "grad_norm": 0.22070027887821198, "learning_rate": 1.2485011867376385e-07, "loss": 0.49574780464172363, "step": 15803 }, { "epoch": 19.391411042944785, "grad_norm": 0.25273093581199646, "learning_rate": 1.2434963165605106e-07, "loss": 0.49352046847343445, "step": 15804 }, { "epoch": 19.392638036809817, "grad_norm": 0.2598955035209656, "learning_rate": 1.2385014730485477e-07, "loss": 0.381794810295105, "step": 15805 }, { "epoch": 19.393865030674846, "grad_norm": 0.2368859201669693, "learning_rate": 1.2335166564031165e-07, "loss": 0.5401523113250732, "step": 15806 }, { "epoch": 19.395092024539878, "grad_norm": 0.22323626279830933, "learning_rate": 1.228541866825139e-07, "loss": 0.48832154273986816, "step": 15807 }, { "epoch": 19.39631901840491, "grad_norm": 0.25122928619384766, "learning_rate": 1.2235771045150945e-07, "loss": 0.5396353006362915, "step": 15808 }, { "epoch": 19.397546012269938, "grad_norm": 0.2825191915035248, "learning_rate": 1.218622369673128e-07, "loss": 0.6739176511764526, "step": 15809 }, { "epoch": 19.39877300613497, "grad_norm": 0.252305805683136, "learning_rate": 1.213677662498941e-07, "loss": 0.5056841969490051, "step": 15810 }, { "epoch": 19.4, "grad_norm": 0.2915877401828766, "learning_rate": 1.208742983191874e-07, "loss": 0.6499778032302856, "step": 15811 }, { "epoch": 19.40122699386503, "grad_norm": 0.283810555934906, "learning_rate": 1.2038183319507955e-07, "loss": 0.499967098236084, "step": 15812 }, { "epoch": 19.402453987730063, "grad_norm": 0.2559482157230377, "learning_rate": 1.198903708974186e-07, "loss": 0.41814619302749634, "step": 15813 }, { "epoch": 19.40368098159509, "grad_norm": 0.24786344170570374, "learning_rate": 1.1939991144601925e-07, "loss": 0.4998861849308014, "step": 15814 }, { "epoch": 19.404907975460123, "grad_norm": 0.23829296231269836, "learning_rate": 1.1891045486064623e-07, "loss": 0.4071350395679474, "step": 15815 }, { "epoch": 19.406134969325155, "grad_norm": 0.28033989667892456, "learning_rate": 1.1842200116103097e-07, "loss": 0.5290799140930176, "step": 15816 }, { "epoch": 19.407361963190183, "grad_norm": 0.25909551978111267, "learning_rate": 1.1793455036685774e-07, "loss": 0.6265980005264282, "step": 15817 }, { "epoch": 19.408588957055215, "grad_norm": 0.24783332645893097, "learning_rate": 1.1744810249777749e-07, "loss": 0.595130205154419, "step": 15818 }, { "epoch": 19.409815950920244, "grad_norm": 0.2714422941207886, "learning_rate": 1.1696265757339952e-07, "loss": 0.5665304064750671, "step": 15819 }, { "epoch": 19.411042944785276, "grad_norm": 0.2818240523338318, "learning_rate": 1.1647821561328598e-07, "loss": 0.6436522603034973, "step": 15820 }, { "epoch": 19.412269938650308, "grad_norm": 0.2629481852054596, "learning_rate": 1.1599477663696845e-07, "loss": 0.6823731660842896, "step": 15821 }, { "epoch": 19.413496932515336, "grad_norm": 0.2467951774597168, "learning_rate": 1.155123406639258e-07, "loss": 0.6010808944702148, "step": 15822 }, { "epoch": 19.41472392638037, "grad_norm": 0.2746279239654541, "learning_rate": 1.1503090771361192e-07, "loss": 0.6330393552780151, "step": 15823 }, { "epoch": 19.4159509202454, "grad_norm": 0.2665802836418152, "learning_rate": 1.1455047780542239e-07, "loss": 0.49999570846557617, "step": 15824 }, { "epoch": 19.41717791411043, "grad_norm": 0.24728545546531677, "learning_rate": 1.1407105095873338e-07, "loss": 0.6300836801528931, "step": 15825 }, { "epoch": 19.41840490797546, "grad_norm": 0.24740903079509735, "learning_rate": 1.1359262719286002e-07, "loss": 0.48572689294815063, "step": 15826 }, { "epoch": 19.41963190184049, "grad_norm": 0.2547744810581207, "learning_rate": 1.131152065270924e-07, "loss": 0.7048099040985107, "step": 15827 }, { "epoch": 19.42085889570552, "grad_norm": 0.2635292410850525, "learning_rate": 1.1263878898066792e-07, "loss": 0.7074223756790161, "step": 15828 }, { "epoch": 19.422085889570553, "grad_norm": 0.24470630288124084, "learning_rate": 1.1216337457279624e-07, "loss": 0.5487247705459595, "step": 15829 }, { "epoch": 19.42331288343558, "grad_norm": 0.2753901481628418, "learning_rate": 1.1168896332263423e-07, "loss": 0.598219633102417, "step": 15830 }, { "epoch": 19.424539877300614, "grad_norm": 0.2721850574016571, "learning_rate": 1.1121555524930826e-07, "loss": 0.6115570664405823, "step": 15831 }, { "epoch": 19.425766871165646, "grad_norm": 0.25848832726478577, "learning_rate": 1.1074315037189753e-07, "loss": 0.7405918836593628, "step": 15832 }, { "epoch": 19.426993865030674, "grad_norm": 0.27631810307502747, "learning_rate": 1.1027174870944234e-07, "loss": 0.6088199615478516, "step": 15833 }, { "epoch": 19.428220858895706, "grad_norm": 0.2754313349723816, "learning_rate": 1.0980135028094696e-07, "loss": 0.5649731159210205, "step": 15834 }, { "epoch": 19.429447852760735, "grad_norm": 0.2925424873828888, "learning_rate": 1.093319551053712e-07, "loss": 0.7299742698669434, "step": 15835 }, { "epoch": 19.430674846625767, "grad_norm": 0.2503783702850342, "learning_rate": 1.0886356320163049e-07, "loss": 0.27628859877586365, "step": 15836 }, { "epoch": 19.4319018404908, "grad_norm": 0.2410682737827301, "learning_rate": 1.0839617458860973e-07, "loss": 0.5626966953277588, "step": 15837 }, { "epoch": 19.433128834355827, "grad_norm": 0.23362362384796143, "learning_rate": 1.0792978928514663e-07, "loss": 0.4103464186191559, "step": 15838 }, { "epoch": 19.43435582822086, "grad_norm": 0.2630615234375, "learning_rate": 1.0746440731003727e-07, "loss": 0.6557605266571045, "step": 15839 }, { "epoch": 19.43558282208589, "grad_norm": 0.24873913824558258, "learning_rate": 1.0700002868204439e-07, "loss": 0.7347830533981323, "step": 15840 }, { "epoch": 19.43680981595092, "grad_norm": 0.2748818099498749, "learning_rate": 1.0653665341988361e-07, "loss": 0.6404551863670349, "step": 15841 }, { "epoch": 19.43803680981595, "grad_norm": 0.22964942455291748, "learning_rate": 1.0607428154222887e-07, "loss": 0.6122875213623047, "step": 15842 }, { "epoch": 19.43926380368098, "grad_norm": 0.2754347026348114, "learning_rate": 1.0561291306772359e-07, "loss": 0.71027010679245, "step": 15843 }, { "epoch": 19.440490797546012, "grad_norm": 0.245911106467247, "learning_rate": 1.0515254801495845e-07, "loss": 0.632553219795227, "step": 15844 }, { "epoch": 19.441717791411044, "grad_norm": 0.2571648061275482, "learning_rate": 1.0469318640249359e-07, "loss": 0.5757336616516113, "step": 15845 }, { "epoch": 19.442944785276072, "grad_norm": 0.26781097054481506, "learning_rate": 1.0423482824883924e-07, "loss": 0.7274123430252075, "step": 15846 }, { "epoch": 19.444171779141104, "grad_norm": 0.2652866244316101, "learning_rate": 1.037774735724778e-07, "loss": 0.5961483716964722, "step": 15847 }, { "epoch": 19.445398773006136, "grad_norm": 0.279137521982193, "learning_rate": 1.03321122391839e-07, "loss": 0.7205758690834045, "step": 15848 }, { "epoch": 19.446625766871165, "grad_norm": 0.35979318618774414, "learning_rate": 1.0286577472531645e-07, "loss": 0.46861517429351807, "step": 15849 }, { "epoch": 19.447852760736197, "grad_norm": 0.2840240001678467, "learning_rate": 1.0241143059126767e-07, "loss": 0.6414732933044434, "step": 15850 }, { "epoch": 19.449079754601225, "grad_norm": 0.2638004720211029, "learning_rate": 1.0195809000800305e-07, "loss": 0.5935536026954651, "step": 15851 }, { "epoch": 19.450306748466257, "grad_norm": 0.24804934859275818, "learning_rate": 1.0150575299379683e-07, "loss": 0.7691047191619873, "step": 15852 }, { "epoch": 19.45153374233129, "grad_norm": 0.2349923551082611, "learning_rate": 1.0105441956688167e-07, "loss": 0.5768539905548096, "step": 15853 }, { "epoch": 19.452760736196318, "grad_norm": 0.27598461508750916, "learning_rate": 1.0060408974544855e-07, "loss": 0.5908697843551636, "step": 15854 }, { "epoch": 19.45398773006135, "grad_norm": 0.29686421155929565, "learning_rate": 1.0015476354764963e-07, "loss": 0.5275049805641174, "step": 15855 }, { "epoch": 19.45521472392638, "grad_norm": 0.2517312467098236, "learning_rate": 9.970644099159266e-08, "loss": 0.3856890797615051, "step": 15856 }, { "epoch": 19.45644171779141, "grad_norm": 0.2788155972957611, "learning_rate": 9.925912209535482e-08, "loss": 0.4941345751285553, "step": 15857 }, { "epoch": 19.457668711656442, "grad_norm": 0.25111356377601624, "learning_rate": 9.881280687696059e-08, "loss": 0.6521642208099365, "step": 15858 }, { "epoch": 19.45889570552147, "grad_norm": 0.28807491064071655, "learning_rate": 9.836749535440115e-08, "loss": 0.6666617393493652, "step": 15859 }, { "epoch": 19.460122699386503, "grad_norm": 0.2396821826696396, "learning_rate": 9.792318754562879e-08, "loss": 0.6506588459014893, "step": 15860 }, { "epoch": 19.461349693251535, "grad_norm": 0.28365930914878845, "learning_rate": 9.747988346854863e-08, "loss": 0.7387405037879944, "step": 15861 }, { "epoch": 19.462576687116563, "grad_norm": 0.24973832070827484, "learning_rate": 9.703758314102974e-08, "loss": 0.4761587977409363, "step": 15862 }, { "epoch": 19.463803680981595, "grad_norm": 0.27384695410728455, "learning_rate": 9.659628658089948e-08, "loss": 0.7770678997039795, "step": 15863 }, { "epoch": 19.465030674846627, "grad_norm": 0.25532543659210205, "learning_rate": 9.61559938059492e-08, "loss": 0.5538173913955688, "step": 15864 }, { "epoch": 19.466257668711656, "grad_norm": 0.2814151644706726, "learning_rate": 9.571670483392026e-08, "loss": 0.46482327580451965, "step": 15865 }, { "epoch": 19.467484662576688, "grad_norm": 0.2586729824542999, "learning_rate": 9.527841968252349e-08, "loss": 0.5719910860061646, "step": 15866 }, { "epoch": 19.46871165644172, "grad_norm": 0.24742953479290009, "learning_rate": 9.484113836941977e-08, "loss": 0.5429021120071411, "step": 15867 }, { "epoch": 19.469938650306748, "grad_norm": 0.29058292508125305, "learning_rate": 9.440486091223944e-08, "loss": 0.6003931760787964, "step": 15868 }, { "epoch": 19.47116564417178, "grad_norm": 0.23325979709625244, "learning_rate": 9.396958732856842e-08, "loss": 0.3479607105255127, "step": 15869 }, { "epoch": 19.47239263803681, "grad_norm": 0.25594303011894226, "learning_rate": 9.353531763594826e-08, "loss": 0.6116254925727844, "step": 15870 }, { "epoch": 19.47361963190184, "grad_norm": 0.306173175573349, "learning_rate": 9.310205185188436e-08, "loss": 0.6195787191390991, "step": 15871 }, { "epoch": 19.474846625766872, "grad_norm": 0.30268770456314087, "learning_rate": 9.26697899938378e-08, "loss": 0.7054605484008789, "step": 15872 }, { "epoch": 19.4760736196319, "grad_norm": 0.2644689977169037, "learning_rate": 9.223853207923627e-08, "loss": 0.5313975214958191, "step": 15873 }, { "epoch": 19.477300613496933, "grad_norm": 0.2595892548561096, "learning_rate": 9.180827812546034e-08, "loss": 0.5963461399078369, "step": 15874 }, { "epoch": 19.478527607361965, "grad_norm": 0.25042617321014404, "learning_rate": 9.137902814984889e-08, "loss": 0.48135995864868164, "step": 15875 }, { "epoch": 19.479754601226993, "grad_norm": 0.24949641525745392, "learning_rate": 9.095078216971031e-08, "loss": 0.6230705380439758, "step": 15876 }, { "epoch": 19.480981595092025, "grad_norm": 0.2671191096305847, "learning_rate": 9.052354020230303e-08, "loss": 0.4922260046005249, "step": 15877 }, { "epoch": 19.482208588957054, "grad_norm": 0.25180691480636597, "learning_rate": 9.009730226484658e-08, "loss": 0.7323871850967407, "step": 15878 }, { "epoch": 19.483435582822086, "grad_norm": 0.23532934486865997, "learning_rate": 8.967206837452446e-08, "loss": 0.49909257888793945, "step": 15879 }, { "epoch": 19.484662576687118, "grad_norm": 0.2709427773952484, "learning_rate": 8.924783854847296e-08, "loss": 0.6748344898223877, "step": 15880 }, { "epoch": 19.485889570552146, "grad_norm": 0.27144742012023926, "learning_rate": 8.882461280379506e-08, "loss": 0.5123757123947144, "step": 15881 }, { "epoch": 19.487116564417178, "grad_norm": 0.2806928753852844, "learning_rate": 8.840239115754656e-08, "loss": 0.5968196392059326, "step": 15882 }, { "epoch": 19.48834355828221, "grad_norm": 0.2839152216911316, "learning_rate": 8.798117362674718e-08, "loss": 0.6130701899528503, "step": 15883 }, { "epoch": 19.48957055214724, "grad_norm": 0.2815488278865814, "learning_rate": 8.756096022837779e-08, "loss": 0.5562403202056885, "step": 15884 }, { "epoch": 19.49079754601227, "grad_norm": 0.281522274017334, "learning_rate": 8.714175097937204e-08, "loss": 0.5834305286407471, "step": 15885 }, { "epoch": 19.4920245398773, "grad_norm": 0.2675686478614807, "learning_rate": 8.672354589662757e-08, "loss": 0.535876989364624, "step": 15886 }, { "epoch": 19.49325153374233, "grad_norm": 0.2832101881504059, "learning_rate": 8.630634499700308e-08, "loss": 0.5851317048072815, "step": 15887 }, { "epoch": 19.494478527607363, "grad_norm": 0.2712644040584564, "learning_rate": 8.58901482973129e-08, "loss": 0.653908371925354, "step": 15888 }, { "epoch": 19.49570552147239, "grad_norm": 0.27528050541877747, "learning_rate": 8.547495581433529e-08, "loss": 0.5186161994934082, "step": 15889 }, { "epoch": 19.496932515337424, "grad_norm": 0.30082106590270996, "learning_rate": 8.50607675648013e-08, "loss": 0.6535308361053467, "step": 15890 }, { "epoch": 19.498159509202456, "grad_norm": 0.2811293601989746, "learning_rate": 8.464758356541147e-08, "loss": 0.6708139181137085, "step": 15891 }, { "epoch": 19.499386503067484, "grad_norm": 0.2910325527191162, "learning_rate": 8.423540383281359e-08, "loss": 0.6598278284072876, "step": 15892 }, { "epoch": 19.500613496932516, "grad_norm": 0.2488136738538742, "learning_rate": 8.382422838362768e-08, "loss": 0.6660745143890381, "step": 15893 }, { "epoch": 19.501840490797544, "grad_norm": 0.29299691319465637, "learning_rate": 8.341405723442108e-08, "loss": 0.3314124345779419, "step": 15894 }, { "epoch": 19.503067484662576, "grad_norm": 0.24812175333499908, "learning_rate": 8.300489040173054e-08, "loss": 0.4809376001358032, "step": 15895 }, { "epoch": 19.50429447852761, "grad_norm": 0.2445564717054367, "learning_rate": 8.259672790204842e-08, "loss": 0.44464632868766785, "step": 15896 }, { "epoch": 19.505521472392637, "grad_norm": 0.27744320034980774, "learning_rate": 8.218956975182268e-08, "loss": 0.6921316385269165, "step": 15897 }, { "epoch": 19.50674846625767, "grad_norm": 0.298613965511322, "learning_rate": 8.178341596747074e-08, "loss": 0.7783851623535156, "step": 15898 }, { "epoch": 19.5079754601227, "grad_norm": 0.2789246141910553, "learning_rate": 8.137826656536007e-08, "loss": 0.6806527972221375, "step": 15899 }, { "epoch": 19.50920245398773, "grad_norm": 0.2696753442287445, "learning_rate": 8.097412156181927e-08, "loss": 0.7563017010688782, "step": 15900 }, { "epoch": 19.51042944785276, "grad_norm": 0.24737033247947693, "learning_rate": 8.057098097314086e-08, "loss": 0.5320447087287903, "step": 15901 }, { "epoch": 19.51165644171779, "grad_norm": 0.2320805937051773, "learning_rate": 8.016884481557574e-08, "loss": 0.40771248936653137, "step": 15902 }, { "epoch": 19.512883435582822, "grad_norm": 0.26835694909095764, "learning_rate": 7.976771310533038e-08, "loss": 0.6366398930549622, "step": 15903 }, { "epoch": 19.514110429447854, "grad_norm": 0.25947368144989014, "learning_rate": 7.93675858585724e-08, "loss": 0.6098448038101196, "step": 15904 }, { "epoch": 19.515337423312882, "grad_norm": 0.2561061382293701, "learning_rate": 7.896846309143058e-08, "loss": 0.4435282349586487, "step": 15905 }, { "epoch": 19.516564417177914, "grad_norm": 0.25841858983039856, "learning_rate": 7.857034481999481e-08, "loss": 0.4411451816558838, "step": 15906 }, { "epoch": 19.517791411042946, "grad_norm": 0.310369610786438, "learning_rate": 7.81732310603106e-08, "loss": 0.5852314233779907, "step": 15907 }, { "epoch": 19.519018404907975, "grad_norm": 0.24505546689033508, "learning_rate": 7.777712182838459e-08, "loss": 0.39524149894714355, "step": 15908 }, { "epoch": 19.520245398773007, "grad_norm": 0.28480860590934753, "learning_rate": 7.738201714017901e-08, "loss": 0.6589886546134949, "step": 15909 }, { "epoch": 19.521472392638035, "grad_norm": 0.278029203414917, "learning_rate": 7.698791701162556e-08, "loss": 0.7642176151275635, "step": 15910 }, { "epoch": 19.522699386503067, "grad_norm": 0.23402190208435059, "learning_rate": 7.659482145860597e-08, "loss": 0.5158405900001526, "step": 15911 }, { "epoch": 19.5239263803681, "grad_norm": 0.24634166061878204, "learning_rate": 7.620273049696313e-08, "loss": 0.3461175858974457, "step": 15912 }, { "epoch": 19.525153374233128, "grad_norm": 0.24783329665660858, "learning_rate": 7.581164414250663e-08, "loss": 0.6311928629875183, "step": 15913 }, { "epoch": 19.52638036809816, "grad_norm": 0.30311301350593567, "learning_rate": 7.542156241099607e-08, "loss": 0.6981312036514282, "step": 15914 }, { "epoch": 19.52760736196319, "grad_norm": 0.2723868489265442, "learning_rate": 7.503248531815499e-08, "loss": 0.3305470049381256, "step": 15915 }, { "epoch": 19.52883435582822, "grad_norm": 0.2765800654888153, "learning_rate": 7.464441287966528e-08, "loss": 0.382474422454834, "step": 15916 }, { "epoch": 19.530061349693252, "grad_norm": 0.2898448407649994, "learning_rate": 7.425734511117e-08, "loss": 0.5361390113830566, "step": 15917 }, { "epoch": 19.53128834355828, "grad_norm": 0.31788763403892517, "learning_rate": 7.387128202827054e-08, "loss": 0.8060480356216431, "step": 15918 }, { "epoch": 19.532515337423312, "grad_norm": 0.27382099628448486, "learning_rate": 7.348622364652946e-08, "loss": 0.7617279291152954, "step": 15919 }, { "epoch": 19.533742331288344, "grad_norm": 0.2756589353084564, "learning_rate": 7.31021699814649e-08, "loss": 0.6167570948600769, "step": 15920 }, { "epoch": 19.534969325153373, "grad_norm": 0.24512624740600586, "learning_rate": 7.271912104855894e-08, "loss": 0.5281206369400024, "step": 15921 }, { "epoch": 19.536196319018405, "grad_norm": 0.26523831486701965, "learning_rate": 7.233707686325198e-08, "loss": 0.584851861000061, "step": 15922 }, { "epoch": 19.537423312883437, "grad_norm": 0.2627396285533905, "learning_rate": 7.195603744093727e-08, "loss": 0.7432947158813477, "step": 15923 }, { "epoch": 19.538650306748465, "grad_norm": 0.25628870725631714, "learning_rate": 7.157600279698029e-08, "loss": 0.6964184641838074, "step": 15924 }, { "epoch": 19.539877300613497, "grad_norm": 0.29257985949516296, "learning_rate": 7.119697294669658e-08, "loss": 0.5086300373077393, "step": 15925 }, { "epoch": 19.54110429447853, "grad_norm": 0.2618100345134735, "learning_rate": 7.081894790536281e-08, "loss": 0.6850935816764832, "step": 15926 }, { "epoch": 19.542331288343558, "grad_norm": 0.2963346838951111, "learning_rate": 7.044192768821955e-08, "loss": 0.7493679523468018, "step": 15927 }, { "epoch": 19.54355828220859, "grad_norm": 0.25884315371513367, "learning_rate": 7.006591231045745e-08, "loss": 0.5280992984771729, "step": 15928 }, { "epoch": 19.54478527607362, "grad_norm": 0.25434258580207825, "learning_rate": 6.969090178723659e-08, "loss": 0.6953238248825073, "step": 15929 }, { "epoch": 19.54601226993865, "grad_norm": 0.2585464119911194, "learning_rate": 6.931689613367542e-08, "loss": 0.7088115811347961, "step": 15930 }, { "epoch": 19.547239263803682, "grad_norm": 0.26759961247444153, "learning_rate": 6.894389536484248e-08, "loss": 0.7121865749359131, "step": 15931 }, { "epoch": 19.54846625766871, "grad_norm": 0.28715232014656067, "learning_rate": 6.85718994957757e-08, "loss": 0.7158739566802979, "step": 15932 }, { "epoch": 19.549693251533743, "grad_norm": 0.24754102528095245, "learning_rate": 6.820090854146866e-08, "loss": 0.4226120114326477, "step": 15933 }, { "epoch": 19.550920245398775, "grad_norm": 0.24757280945777893, "learning_rate": 6.783092251687606e-08, "loss": 0.3920643925666809, "step": 15934 }, { "epoch": 19.552147239263803, "grad_norm": 0.2581540048122406, "learning_rate": 6.746194143691099e-08, "loss": 0.6029343605041504, "step": 15935 }, { "epoch": 19.553374233128835, "grad_norm": 0.25252625346183777, "learning_rate": 6.709396531644485e-08, "loss": 0.48421990871429443, "step": 15936 }, { "epoch": 19.554601226993864, "grad_norm": 0.25600069761276245, "learning_rate": 6.672699417031026e-08, "loss": 0.34504538774490356, "step": 15937 }, { "epoch": 19.555828220858896, "grad_norm": 0.2522304654121399, "learning_rate": 6.636102801329813e-08, "loss": 0.4672437310218811, "step": 15938 }, { "epoch": 19.557055214723928, "grad_norm": 0.2651875913143158, "learning_rate": 6.599606686015781e-08, "loss": 0.4058421850204468, "step": 15939 }, { "epoch": 19.558282208588956, "grad_norm": 0.2799481749534607, "learning_rate": 6.563211072560527e-08, "loss": 0.7666460871696472, "step": 15940 }, { "epoch": 19.559509202453988, "grad_norm": 0.22108659148216248, "learning_rate": 6.526915962430658e-08, "loss": 0.3916415572166443, "step": 15941 }, { "epoch": 19.56073619631902, "grad_norm": 0.2747965157032013, "learning_rate": 6.490721357089169e-08, "loss": 0.6071722507476807, "step": 15942 }, { "epoch": 19.56196319018405, "grad_norm": 0.25979363918304443, "learning_rate": 6.454627257994894e-08, "loss": 0.6049418449401855, "step": 15943 }, { "epoch": 19.56319018404908, "grad_norm": 0.26901867985725403, "learning_rate": 6.418633666602781e-08, "loss": 0.7086642384529114, "step": 15944 }, { "epoch": 19.56441717791411, "grad_norm": 0.25389572978019714, "learning_rate": 6.382740584363889e-08, "loss": 0.6163915395736694, "step": 15945 }, { "epoch": 19.56564417177914, "grad_norm": 0.2503718137741089, "learning_rate": 6.346948012724562e-08, "loss": 0.4150804281234741, "step": 15946 }, { "epoch": 19.566871165644173, "grad_norm": 0.25518688559532166, "learning_rate": 6.311255953127538e-08, "loss": 0.5330639481544495, "step": 15947 }, { "epoch": 19.5680981595092, "grad_norm": 0.299319326877594, "learning_rate": 6.275664407011661e-08, "loss": 0.7041675448417664, "step": 15948 }, { "epoch": 19.569325153374233, "grad_norm": 0.30883556604385376, "learning_rate": 6.240173375811343e-08, "loss": 0.7282319068908691, "step": 15949 }, { "epoch": 19.570552147239265, "grad_norm": 0.25967684388160706, "learning_rate": 6.204782860957381e-08, "loss": 0.6163053512573242, "step": 15950 }, { "epoch": 19.571779141104294, "grad_norm": 0.27229344844818115, "learning_rate": 6.169492863875858e-08, "loss": 0.5974706411361694, "step": 15951 }, { "epoch": 19.573006134969326, "grad_norm": 0.27585119009017944, "learning_rate": 6.134303385989804e-08, "loss": 0.5248773097991943, "step": 15952 }, { "epoch": 19.574233128834354, "grad_norm": 0.26701897382736206, "learning_rate": 6.09921442871697e-08, "loss": 0.5168187618255615, "step": 15953 }, { "epoch": 19.575460122699386, "grad_norm": 0.27038970589637756, "learning_rate": 6.064225993472061e-08, "loss": 0.6226307153701782, "step": 15954 }, { "epoch": 19.57668711656442, "grad_norm": 0.24386391043663025, "learning_rate": 6.029338081665059e-08, "loss": 0.46357542276382446, "step": 15955 }, { "epoch": 19.577914110429447, "grad_norm": 0.2578459084033966, "learning_rate": 5.994550694702616e-08, "loss": 0.612402081489563, "step": 15956 }, { "epoch": 19.57914110429448, "grad_norm": 0.2820955514907837, "learning_rate": 5.9598638339866676e-08, "loss": 0.6916606426239014, "step": 15957 }, { "epoch": 19.58036809815951, "grad_norm": 0.23977236449718475, "learning_rate": 5.925277500915538e-08, "loss": 0.4292460083961487, "step": 15958 }, { "epoch": 19.58159509202454, "grad_norm": 0.2951478064060211, "learning_rate": 5.890791696882836e-08, "loss": 0.7307682037353516, "step": 15959 }, { "epoch": 19.58282208588957, "grad_norm": 0.27148646116256714, "learning_rate": 5.856406423279115e-08, "loss": 0.6923516988754272, "step": 15960 }, { "epoch": 19.5840490797546, "grad_norm": 0.2864905893802643, "learning_rate": 5.8221216814902114e-08, "loss": 0.5641052722930908, "step": 15961 }, { "epoch": 19.58527607361963, "grad_norm": 0.2598612904548645, "learning_rate": 5.7879374728977974e-08, "loss": 0.6117309331893921, "step": 15962 }, { "epoch": 19.586503067484664, "grad_norm": 0.23265589773654938, "learning_rate": 5.753853798879938e-08, "loss": 0.3815501928329468, "step": 15963 }, { "epoch": 19.587730061349692, "grad_norm": 0.253229558467865, "learning_rate": 5.7198706608105335e-08, "loss": 0.6089166402816772, "step": 15964 }, { "epoch": 19.588957055214724, "grad_norm": 0.3138175308704376, "learning_rate": 5.685988060059044e-08, "loss": 0.630569577217102, "step": 15965 }, { "epoch": 19.590184049079756, "grad_norm": 0.2473113089799881, "learning_rate": 5.6522059979915996e-08, "loss": 0.5150618553161621, "step": 15966 }, { "epoch": 19.591411042944785, "grad_norm": 0.2767941653728485, "learning_rate": 5.618524475969334e-08, "loss": 0.5117374062538147, "step": 15967 }, { "epoch": 19.592638036809817, "grad_norm": 0.25861138105392456, "learning_rate": 5.5849434953503254e-08, "loss": 0.6653376817703247, "step": 15968 }, { "epoch": 19.593865030674845, "grad_norm": 0.2616503834724426, "learning_rate": 5.551463057487938e-08, "loss": 0.5457661151885986, "step": 15969 }, { "epoch": 19.595092024539877, "grad_norm": 0.25805747509002686, "learning_rate": 5.518083163731647e-08, "loss": 0.6343573331832886, "step": 15970 }, { "epoch": 19.59631901840491, "grad_norm": 0.24960413575172424, "learning_rate": 5.484803815427042e-08, "loss": 0.6431906223297119, "step": 15971 }, { "epoch": 19.597546012269937, "grad_norm": 0.2577416002750397, "learning_rate": 5.4516250139152733e-08, "loss": 0.6427536606788635, "step": 15972 }, { "epoch": 19.59877300613497, "grad_norm": 0.2893083393573761, "learning_rate": 5.4185467605338824e-08, "loss": 0.5637693405151367, "step": 15973 }, { "epoch": 19.6, "grad_norm": 0.2627011835575104, "learning_rate": 5.385569056616246e-08, "loss": 0.2358238697052002, "step": 15974 }, { "epoch": 19.60122699386503, "grad_norm": 0.2792356312274933, "learning_rate": 5.352691903491303e-08, "loss": 0.5992540717124939, "step": 15975 }, { "epoch": 19.602453987730062, "grad_norm": 0.2684832811355591, "learning_rate": 5.31991530248438e-08, "loss": 0.7728331685066223, "step": 15976 }, { "epoch": 19.60368098159509, "grad_norm": 0.2679431140422821, "learning_rate": 5.2872392549166424e-08, "loss": 0.5410866737365723, "step": 15977 }, { "epoch": 19.604907975460122, "grad_norm": 0.2507835626602173, "learning_rate": 5.2546637621050943e-08, "loss": 0.5245778560638428, "step": 15978 }, { "epoch": 19.606134969325154, "grad_norm": 0.2643088698387146, "learning_rate": 5.2221888253628505e-08, "loss": 0.6431630253791809, "step": 15979 }, { "epoch": 19.607361963190183, "grad_norm": 0.2940313518047333, "learning_rate": 5.189814445998864e-08, "loss": 0.6931966543197632, "step": 15980 }, { "epoch": 19.608588957055215, "grad_norm": 0.26726362109184265, "learning_rate": 5.1575406253182026e-08, "loss": 0.6651864051818848, "step": 15981 }, { "epoch": 19.609815950920247, "grad_norm": 0.29355674982070923, "learning_rate": 5.125367364621492e-08, "loss": 0.5984067916870117, "step": 15982 }, { "epoch": 19.611042944785275, "grad_norm": 0.2607671320438385, "learning_rate": 5.0932946652054724e-08, "loss": 0.5616246461868286, "step": 15983 }, { "epoch": 19.612269938650307, "grad_norm": 0.30474963784217834, "learning_rate": 5.0613225283629995e-08, "loss": 0.726374626159668, "step": 15984 }, { "epoch": 19.61349693251534, "grad_norm": 0.25811973214149475, "learning_rate": 5.0294509553830415e-08, "loss": 0.6424156427383423, "step": 15985 }, { "epoch": 19.614723926380368, "grad_norm": 0.2733149528503418, "learning_rate": 4.997679947549849e-08, "loss": 0.5350072383880615, "step": 15986 }, { "epoch": 19.6159509202454, "grad_norm": 0.23136980831623077, "learning_rate": 4.966009506144342e-08, "loss": 0.3736768960952759, "step": 15987 }, { "epoch": 19.617177914110428, "grad_norm": 0.27484381198883057, "learning_rate": 4.934439632443e-08, "loss": 0.6022725105285645, "step": 15988 }, { "epoch": 19.61840490797546, "grad_norm": 0.23771561682224274, "learning_rate": 4.902970327718137e-08, "loss": 0.4629988670349121, "step": 15989 }, { "epoch": 19.619631901840492, "grad_norm": 0.27798375487327576, "learning_rate": 4.871601593238184e-08, "loss": 0.6429271697998047, "step": 15990 }, { "epoch": 19.62085889570552, "grad_norm": 0.28687602281570435, "learning_rate": 4.840333430267685e-08, "loss": 0.6762521266937256, "step": 15991 }, { "epoch": 19.622085889570553, "grad_norm": 0.2383459359407425, "learning_rate": 4.8091658400670203e-08, "loss": 0.5714373588562012, "step": 15992 }, { "epoch": 19.62331288343558, "grad_norm": 0.2682969570159912, "learning_rate": 4.778098823892407e-08, "loss": 0.5539230108261108, "step": 15993 }, { "epoch": 19.624539877300613, "grad_norm": 0.1876458376646042, "learning_rate": 4.7471323829959e-08, "loss": 0.2790374159812927, "step": 15994 }, { "epoch": 19.625766871165645, "grad_norm": 0.30028119683265686, "learning_rate": 4.716266518625945e-08, "loss": 0.7559159398078918, "step": 15995 }, { "epoch": 19.626993865030673, "grad_norm": 0.2864221930503845, "learning_rate": 4.6855012320265456e-08, "loss": 0.6386426687240601, "step": 15996 }, { "epoch": 19.628220858895705, "grad_norm": 0.2376590073108673, "learning_rate": 4.6548365244375446e-08, "loss": 0.40150660276412964, "step": 15997 }, { "epoch": 19.629447852760737, "grad_norm": 0.2645387649536133, "learning_rate": 4.624272397095175e-08, "loss": 0.37160634994506836, "step": 15998 }, { "epoch": 19.630674846625766, "grad_norm": 0.2877919375896454, "learning_rate": 4.593808851231507e-08, "loss": 0.6853649616241455, "step": 15999 }, { "epoch": 19.631901840490798, "grad_norm": 0.30210092663764954, "learning_rate": 4.563445888074169e-08, "loss": 0.7398550510406494, "step": 16000 }, { "epoch": 19.63312883435583, "grad_norm": 0.2839699983596802, "learning_rate": 4.533183508847183e-08, "loss": 0.6766139268875122, "step": 16001 }, { "epoch": 19.63435582822086, "grad_norm": 0.26487016677856445, "learning_rate": 4.5030217147701284e-08, "loss": 0.6392691135406494, "step": 16002 }, { "epoch": 19.63558282208589, "grad_norm": 0.2603174149990082, "learning_rate": 4.472960507058976e-08, "loss": 0.4220722019672394, "step": 16003 }, { "epoch": 19.63680981595092, "grad_norm": 0.28972843289375305, "learning_rate": 4.442999886925536e-08, "loss": 0.7597860097885132, "step": 16004 }, { "epoch": 19.63803680981595, "grad_norm": 0.27056142687797546, "learning_rate": 4.4131398555768976e-08, "loss": 0.7562472820281982, "step": 16005 }, { "epoch": 19.639263803680983, "grad_norm": 0.2781376838684082, "learning_rate": 4.383380414217098e-08, "loss": 0.5332688093185425, "step": 16006 }, { "epoch": 19.64049079754601, "grad_norm": 0.265010267496109, "learning_rate": 4.353721564045454e-08, "loss": 0.5570353865623474, "step": 16007 }, { "epoch": 19.641717791411043, "grad_norm": 0.2824796736240387, "learning_rate": 4.3241633062574006e-08, "loss": 0.6140771508216858, "step": 16008 }, { "epoch": 19.642944785276075, "grad_norm": 0.23544342815876007, "learning_rate": 4.2947056420447606e-08, "loss": 0.3051731288433075, "step": 16009 }, { "epoch": 19.644171779141104, "grad_norm": 0.2712775468826294, "learning_rate": 4.265348572594363e-08, "loss": 0.7392816543579102, "step": 16010 }, { "epoch": 19.645398773006136, "grad_norm": 0.2700348496437073, "learning_rate": 4.236092099089706e-08, "loss": 0.507668137550354, "step": 16011 }, { "epoch": 19.646625766871164, "grad_norm": 0.25249290466308594, "learning_rate": 4.2069362227098454e-08, "loss": 0.5745823383331299, "step": 16012 }, { "epoch": 19.647852760736196, "grad_norm": 0.2548300623893738, "learning_rate": 4.1778809446302304e-08, "loss": 0.665492057800293, "step": 16013 }, { "epoch": 19.649079754601228, "grad_norm": 0.2931409478187561, "learning_rate": 4.1489262660221465e-08, "loss": 0.6510673761367798, "step": 16014 }, { "epoch": 19.650306748466257, "grad_norm": 0.2476225644350052, "learning_rate": 4.120072188052437e-08, "loss": 0.5178371071815491, "step": 16015 }, { "epoch": 19.65153374233129, "grad_norm": 0.25723797082901, "learning_rate": 4.091318711884062e-08, "loss": 0.41661933064460754, "step": 16016 }, { "epoch": 19.65276073619632, "grad_norm": 0.255218505859375, "learning_rate": 4.062665838676094e-08, "loss": 0.6123206615447998, "step": 16017 }, { "epoch": 19.65398773006135, "grad_norm": 0.25502490997314453, "learning_rate": 4.034113569583442e-08, "loss": 0.5211910009384155, "step": 16018 }, { "epoch": 19.65521472392638, "grad_norm": 0.2687593698501587, "learning_rate": 4.005661905756852e-08, "loss": 0.6342282295227051, "step": 16019 }, { "epoch": 19.65644171779141, "grad_norm": 0.2593687176704407, "learning_rate": 3.9773108483431855e-08, "loss": 0.6300607323646545, "step": 16020 }, { "epoch": 19.65766871165644, "grad_norm": 0.2779507339000702, "learning_rate": 3.9490603984854156e-08, "loss": 0.5340242981910706, "step": 16021 }, { "epoch": 19.658895705521473, "grad_norm": 0.2657099962234497, "learning_rate": 3.920910557322077e-08, "loss": 0.5706982612609863, "step": 16022 }, { "epoch": 19.660122699386502, "grad_norm": 0.2637782394886017, "learning_rate": 3.89286132598754e-08, "loss": 0.5678005218505859, "step": 16023 }, { "epoch": 19.661349693251534, "grad_norm": 0.27354493737220764, "learning_rate": 3.864912705612844e-08, "loss": 0.6449894905090332, "step": 16024 }, { "epoch": 19.662576687116566, "grad_norm": 0.2575005888938904, "learning_rate": 3.8370646973243106e-08, "loss": 0.5875341892242432, "step": 16025 }, { "epoch": 19.663803680981594, "grad_norm": 0.26397693157196045, "learning_rate": 3.8093173022443754e-08, "loss": 0.7614321708679199, "step": 16026 }, { "epoch": 19.665030674846626, "grad_norm": 0.2759767472743988, "learning_rate": 3.781670521491587e-08, "loss": 0.5747803449630737, "step": 16027 }, { "epoch": 19.666257668711655, "grad_norm": 0.25943878293037415, "learning_rate": 3.754124356180055e-08, "loss": 0.49038490653038025, "step": 16028 }, { "epoch": 19.667484662576687, "grad_norm": 0.271399587392807, "learning_rate": 3.726678807420558e-08, "loss": 0.6361850500106812, "step": 16029 }, { "epoch": 19.66871165644172, "grad_norm": 0.2906273305416107, "learning_rate": 3.6993338763186004e-08, "loss": 0.6132738590240479, "step": 16030 }, { "epoch": 19.669938650306747, "grad_norm": 0.22629183530807495, "learning_rate": 3.672089563977188e-08, "loss": 0.47655367851257324, "step": 16031 }, { "epoch": 19.67116564417178, "grad_norm": 0.28109127283096313, "learning_rate": 3.6449458714940546e-08, "loss": 0.6552042961120605, "step": 16032 }, { "epoch": 19.67239263803681, "grad_norm": 0.2612967789173126, "learning_rate": 3.617902799963047e-08, "loss": 0.5518936514854431, "step": 16033 }, { "epoch": 19.67361963190184, "grad_norm": 0.24125449359416962, "learning_rate": 3.590960350474681e-08, "loss": 0.37665876746177673, "step": 16034 }, { "epoch": 19.67484662576687, "grad_norm": 0.29064464569091797, "learning_rate": 3.564118524114757e-08, "loss": 0.8693332672119141, "step": 16035 }, { "epoch": 19.6760736196319, "grad_norm": 0.2659071981906891, "learning_rate": 3.537377321965185e-08, "loss": 0.5995357036590576, "step": 16036 }, { "epoch": 19.677300613496932, "grad_norm": 0.3252749741077423, "learning_rate": 3.5107367451037156e-08, "loss": 0.7127843499183655, "step": 16037 }, { "epoch": 19.678527607361964, "grad_norm": 0.2711711525917053, "learning_rate": 3.484196794604211e-08, "loss": 0.4827805161476135, "step": 16038 }, { "epoch": 19.679754601226993, "grad_norm": 0.26385441422462463, "learning_rate": 3.457757471536649e-08, "loss": 0.6378778219223022, "step": 16039 }, { "epoch": 19.680981595092025, "grad_norm": 0.2938792407512665, "learning_rate": 3.431418776966289e-08, "loss": 0.6333194971084595, "step": 16040 }, { "epoch": 19.682208588957057, "grad_norm": 0.27745676040649414, "learning_rate": 3.405180711955058e-08, "loss": 0.6715232133865356, "step": 16041 }, { "epoch": 19.683435582822085, "grad_norm": 0.25622454285621643, "learning_rate": 3.379043277560445e-08, "loss": 0.6189010143280029, "step": 16042 }, { "epoch": 19.684662576687117, "grad_norm": 0.24024777114391327, "learning_rate": 3.3530064748360515e-08, "loss": 0.5002444386482239, "step": 16043 }, { "epoch": 19.68588957055215, "grad_norm": 0.24615056812763214, "learning_rate": 3.3270703048313147e-08, "loss": 0.5957982540130615, "step": 16044 }, { "epoch": 19.687116564417177, "grad_norm": 0.2776491343975067, "learning_rate": 3.3012347685915104e-08, "loss": 0.5292474031448364, "step": 16045 }, { "epoch": 19.68834355828221, "grad_norm": 0.2500789761543274, "learning_rate": 3.2754998671583046e-08, "loss": 0.6083974242210388, "step": 16046 }, { "epoch": 19.689570552147238, "grad_norm": 0.29520708322525024, "learning_rate": 3.249865601568647e-08, "loss": 0.7569975852966309, "step": 16047 }, { "epoch": 19.69079754601227, "grad_norm": 0.27156761288642883, "learning_rate": 3.224331972856154e-08, "loss": 0.773374080657959, "step": 16048 }, { "epoch": 19.692024539877302, "grad_norm": 0.277505487203598, "learning_rate": 3.198898982049725e-08, "loss": 0.7151801586151123, "step": 16049 }, { "epoch": 19.69325153374233, "grad_norm": 0.2487240731716156, "learning_rate": 3.1735666301746514e-08, "loss": 0.6857708692550659, "step": 16050 }, { "epoch": 19.694478527607362, "grad_norm": 0.2689577043056488, "learning_rate": 3.148334918251783e-08, "loss": 0.5517776608467102, "step": 16051 }, { "epoch": 19.69570552147239, "grad_norm": 0.28085383772850037, "learning_rate": 3.123203847298362e-08, "loss": 0.6142587065696716, "step": 16052 }, { "epoch": 19.696932515337423, "grad_norm": 0.2815287411212921, "learning_rate": 3.0981734183274657e-08, "loss": 0.8219236731529236, "step": 16053 }, { "epoch": 19.698159509202455, "grad_norm": 0.2464151233434677, "learning_rate": 3.073243632347456e-08, "loss": 0.5621984601020813, "step": 16054 }, { "epoch": 19.699386503067483, "grad_norm": 0.27578023076057434, "learning_rate": 3.0484144903639156e-08, "loss": 0.6351155638694763, "step": 16055 }, { "epoch": 19.700613496932515, "grad_norm": 0.27246275544166565, "learning_rate": 3.023685993376879e-08, "loss": 0.5692144632339478, "step": 16056 }, { "epoch": 19.701840490797547, "grad_norm": 0.29104340076446533, "learning_rate": 2.999058142383604e-08, "loss": 0.6804317831993103, "step": 16057 }, { "epoch": 19.703067484662576, "grad_norm": 0.2483447641134262, "learning_rate": 2.974530938376907e-08, "loss": 0.368174284696579, "step": 16058 }, { "epoch": 19.704294478527608, "grad_norm": 0.2847994565963745, "learning_rate": 2.950104382344887e-08, "loss": 0.6415199637413025, "step": 16059 }, { "epoch": 19.70552147239264, "grad_norm": 0.2714451849460602, "learning_rate": 2.9257784752723118e-08, "loss": 0.5773791074752808, "step": 16060 }, { "epoch": 19.706748466257668, "grad_norm": 0.2644261419773102, "learning_rate": 2.9015532181397853e-08, "loss": 0.6783016920089722, "step": 16061 }, { "epoch": 19.7079754601227, "grad_norm": 0.29063233733177185, "learning_rate": 2.8774286119234716e-08, "loss": 0.6394919157028198, "step": 16062 }, { "epoch": 19.70920245398773, "grad_norm": 0.24129268527030945, "learning_rate": 2.8534046575964812e-08, "loss": 0.5730476379394531, "step": 16063 }, { "epoch": 19.71042944785276, "grad_norm": 0.25690650939941406, "learning_rate": 2.8294813561263734e-08, "loss": 0.6591860055923462, "step": 16064 }, { "epoch": 19.711656441717793, "grad_norm": 0.27134397625923157, "learning_rate": 2.805658708477654e-08, "loss": 0.5581389665603638, "step": 16065 }, { "epoch": 19.71288343558282, "grad_norm": 0.28178083896636963, "learning_rate": 2.7819367156106667e-08, "loss": 0.6799309253692627, "step": 16066 }, { "epoch": 19.714110429447853, "grad_norm": 0.28005310893058777, "learning_rate": 2.7583153784815908e-08, "loss": 0.7239497900009155, "step": 16067 }, { "epoch": 19.715337423312885, "grad_norm": 0.27341434359550476, "learning_rate": 2.734794698042442e-08, "loss": 0.5189157724380493, "step": 16068 }, { "epoch": 19.716564417177914, "grad_norm": 0.2786354422569275, "learning_rate": 2.711374675241074e-08, "loss": 0.7233799695968628, "step": 16069 }, { "epoch": 19.717791411042946, "grad_norm": 0.27927231788635254, "learning_rate": 2.6880553110220087e-08, "loss": 0.6193320751190186, "step": 16070 }, { "epoch": 19.719018404907974, "grad_norm": 0.2907028794288635, "learning_rate": 2.664836606324772e-08, "loss": 0.6454459428787231, "step": 16071 }, { "epoch": 19.720245398773006, "grad_norm": 0.32196488976478577, "learning_rate": 2.6417185620852823e-08, "loss": 0.5972232818603516, "step": 16072 }, { "epoch": 19.721472392638038, "grad_norm": 0.2817733585834503, "learning_rate": 2.618701179235572e-08, "loss": 0.5001744031906128, "step": 16073 }, { "epoch": 19.722699386503066, "grad_norm": 0.25485125184059143, "learning_rate": 2.595784458703232e-08, "loss": 0.5342998504638672, "step": 16074 }, { "epoch": 19.7239263803681, "grad_norm": 0.2412424385547638, "learning_rate": 2.5729684014119683e-08, "loss": 0.4952559471130371, "step": 16075 }, { "epoch": 19.72515337423313, "grad_norm": 0.3065892159938812, "learning_rate": 2.5502530082813226e-08, "loss": 0.6449559330940247, "step": 16076 }, { "epoch": 19.72638036809816, "grad_norm": 0.29340454936027527, "learning_rate": 2.5276382802272292e-08, "loss": 0.5114918351173401, "step": 16077 }, { "epoch": 19.72760736196319, "grad_norm": 0.26721706986427307, "learning_rate": 2.5051242181609037e-08, "loss": 0.4897948205471039, "step": 16078 }, { "epoch": 19.72883435582822, "grad_norm": 0.2524581849575043, "learning_rate": 2.482710822989953e-08, "loss": 0.5690096616744995, "step": 16079 }, { "epoch": 19.73006134969325, "grad_norm": 0.2821669578552246, "learning_rate": 2.4603980956178218e-08, "loss": 0.8166283965110779, "step": 16080 }, { "epoch": 19.731288343558283, "grad_norm": 0.2750683128833771, "learning_rate": 2.4381860369437905e-08, "loss": 0.5883926153182983, "step": 16081 }, { "epoch": 19.73251533742331, "grad_norm": 0.29478567838668823, "learning_rate": 2.4160746478632536e-08, "loss": 0.6610470414161682, "step": 16082 }, { "epoch": 19.733742331288344, "grad_norm": 0.26517975330352783, "learning_rate": 2.3940639292674427e-08, "loss": 0.7135617136955261, "step": 16083 }, { "epoch": 19.734969325153376, "grad_norm": 0.23761983215808868, "learning_rate": 2.3721538820434264e-08, "loss": 0.4073242247104645, "step": 16084 }, { "epoch": 19.736196319018404, "grad_norm": 0.27607834339141846, "learning_rate": 2.3503445070746645e-08, "loss": 0.569398820400238, "step": 16085 }, { "epoch": 19.737423312883436, "grad_norm": 0.25899583101272583, "learning_rate": 2.3286358052398983e-08, "loss": 0.5767850875854492, "step": 16086 }, { "epoch": 19.738650306748465, "grad_norm": 0.30660712718963623, "learning_rate": 2.3070277774139836e-08, "loss": 0.6670733094215393, "step": 16087 }, { "epoch": 19.739877300613497, "grad_norm": 0.24769040942192078, "learning_rate": 2.285520424468446e-08, "loss": 0.5839700698852539, "step": 16088 }, { "epoch": 19.74110429447853, "grad_norm": 0.29058071970939636, "learning_rate": 2.2641137472698136e-08, "loss": 0.5117849707603455, "step": 16089 }, { "epoch": 19.742331288343557, "grad_norm": 0.2758978307247162, "learning_rate": 2.2428077466807307e-08, "loss": 0.612994909286499, "step": 16090 }, { "epoch": 19.74355828220859, "grad_norm": 0.2502864599227905, "learning_rate": 2.2216024235605092e-08, "loss": 0.49561208486557007, "step": 16091 }, { "epoch": 19.74478527607362, "grad_norm": 0.2722310423851013, "learning_rate": 2.2004977787634663e-08, "loss": 0.46982502937316895, "step": 16092 }, { "epoch": 19.74601226993865, "grad_norm": 0.3121661841869354, "learning_rate": 2.1794938131405873e-08, "loss": 0.6763758063316345, "step": 16093 }, { "epoch": 19.74723926380368, "grad_norm": 0.2213575690984726, "learning_rate": 2.15859052753814e-08, "loss": 0.5490544438362122, "step": 16094 }, { "epoch": 19.74846625766871, "grad_norm": 0.23200438916683197, "learning_rate": 2.137787922798784e-08, "loss": 0.5354525446891785, "step": 16095 }, { "epoch": 19.749693251533742, "grad_norm": 0.26940786838531494, "learning_rate": 2.1170859997612925e-08, "loss": 0.5801250338554382, "step": 16096 }, { "epoch": 19.750920245398774, "grad_norm": 0.25600069761276245, "learning_rate": 2.0964847592597204e-08, "loss": 0.5844970345497131, "step": 16097 }, { "epoch": 19.752147239263802, "grad_norm": 0.2807966470718384, "learning_rate": 2.0759842021247922e-08, "loss": 0.5886245369911194, "step": 16098 }, { "epoch": 19.753374233128834, "grad_norm": 0.3036273717880249, "learning_rate": 2.055584329182514e-08, "loss": 0.41089171171188354, "step": 16099 }, { "epoch": 19.754601226993866, "grad_norm": 0.2875816524028778, "learning_rate": 2.0352851412552832e-08, "loss": 0.6999335289001465, "step": 16100 }, { "epoch": 19.755828220858895, "grad_norm": 0.2529551684856415, "learning_rate": 2.0150866391613345e-08, "loss": 0.6482492685317993, "step": 16101 }, { "epoch": 19.757055214723927, "grad_norm": 0.25869661569595337, "learning_rate": 1.9949888237147385e-08, "loss": 0.5927771329879761, "step": 16102 }, { "epoch": 19.758282208588955, "grad_norm": 0.2676050662994385, "learning_rate": 1.9749916957254034e-08, "loss": 0.4569295346736908, "step": 16103 }, { "epoch": 19.759509202453987, "grad_norm": 0.24169577658176422, "learning_rate": 1.9550952559996284e-08, "loss": 0.4544190764427185, "step": 16104 }, { "epoch": 19.76073619631902, "grad_norm": 0.2864413857460022, "learning_rate": 1.9352995053395494e-08, "loss": 0.6269335746765137, "step": 16105 }, { "epoch": 19.761963190184048, "grad_norm": 0.2390204519033432, "learning_rate": 1.9156044445428624e-08, "loss": 0.5719627141952515, "step": 16106 }, { "epoch": 19.76319018404908, "grad_norm": 0.24103528261184692, "learning_rate": 1.8960100744030983e-08, "loss": 0.5354752540588379, "step": 16107 }, { "epoch": 19.764417177914112, "grad_norm": 0.28023040294647217, "learning_rate": 1.8765163957104593e-08, "loss": 0.6185349225997925, "step": 16108 }, { "epoch": 19.76564417177914, "grad_norm": 0.26762810349464417, "learning_rate": 1.8571234092507052e-08, "loss": 0.6955153942108154, "step": 16109 }, { "epoch": 19.766871165644172, "grad_norm": 0.26790452003479004, "learning_rate": 1.8378311158051554e-08, "loss": 0.6795819997787476, "step": 16110 }, { "epoch": 19.7680981595092, "grad_norm": 0.27976611256599426, "learning_rate": 1.8186395161520764e-08, "loss": 0.6870114207267761, "step": 16111 }, { "epoch": 19.769325153374233, "grad_norm": 0.3037409484386444, "learning_rate": 1.7995486110641835e-08, "loss": 0.5843106508255005, "step": 16112 }, { "epoch": 19.770552147239265, "grad_norm": 0.29687419533729553, "learning_rate": 1.780558401311416e-08, "loss": 0.581790566444397, "step": 16113 }, { "epoch": 19.771779141104293, "grad_norm": 0.24483460187911987, "learning_rate": 1.7616688876592734e-08, "loss": 0.567284107208252, "step": 16114 }, { "epoch": 19.773006134969325, "grad_norm": 0.25496798753738403, "learning_rate": 1.7428800708693683e-08, "loss": 0.6368364095687866, "step": 16115 }, { "epoch": 19.774233128834357, "grad_norm": 0.25047779083251953, "learning_rate": 1.7241919516983175e-08, "loss": 0.5190349817276001, "step": 16116 }, { "epoch": 19.775460122699386, "grad_norm": 0.28317609429359436, "learning_rate": 1.7056045308999623e-08, "loss": 0.601601243019104, "step": 16117 }, { "epoch": 19.776687116564418, "grad_norm": 0.2964201867580414, "learning_rate": 1.687117809223149e-08, "loss": 0.6052480340003967, "step": 16118 }, { "epoch": 19.77791411042945, "grad_norm": 0.2601813077926636, "learning_rate": 1.6687317874133913e-08, "loss": 0.5624167919158936, "step": 16119 }, { "epoch": 19.779141104294478, "grad_norm": 0.2339327335357666, "learning_rate": 1.650446466211486e-08, "loss": 0.49464109539985657, "step": 16120 }, { "epoch": 19.78036809815951, "grad_norm": 0.2931138873100281, "learning_rate": 1.6322618463546212e-08, "loss": 0.7361763715744019, "step": 16121 }, { "epoch": 19.78159509202454, "grad_norm": 0.28705254197120667, "learning_rate": 1.6141779285758217e-08, "loss": 0.6358294486999512, "step": 16122 }, { "epoch": 19.78282208588957, "grad_norm": 0.25853028893470764, "learning_rate": 1.5961947136036715e-08, "loss": 0.5759102702140808, "step": 16123 }, { "epoch": 19.784049079754602, "grad_norm": 0.2429991215467453, "learning_rate": 1.5783122021634233e-08, "loss": 0.5402126312255859, "step": 16124 }, { "epoch": 19.78527607361963, "grad_norm": 0.2798876464366913, "learning_rate": 1.5605303949756124e-08, "loss": 0.5620956420898438, "step": 16125 }, { "epoch": 19.786503067484663, "grad_norm": 0.26559048891067505, "learning_rate": 1.5428492927571648e-08, "loss": 0.4874908924102783, "step": 16126 }, { "epoch": 19.787730061349695, "grad_norm": 0.24992987513542175, "learning_rate": 1.5252688962202886e-08, "loss": 0.5381543040275574, "step": 16127 }, { "epoch": 19.788957055214723, "grad_norm": 0.26071953773498535, "learning_rate": 1.5077892060741394e-08, "loss": 0.5717687010765076, "step": 16128 }, { "epoch": 19.790184049079755, "grad_norm": 0.23289570212364197, "learning_rate": 1.490410223023153e-08, "loss": 0.5691318511962891, "step": 16129 }, { "epoch": 19.791411042944784, "grad_norm": 0.3143908679485321, "learning_rate": 1.4731319477678806e-08, "loss": 0.7563022375106812, "step": 16130 }, { "epoch": 19.792638036809816, "grad_norm": 0.2691217362880707, "learning_rate": 1.4559543810044318e-08, "loss": 0.5809985995292664, "step": 16131 }, { "epoch": 19.793865030674848, "grad_norm": 0.2620144784450531, "learning_rate": 1.4388775234255857e-08, "loss": 0.6333280205726624, "step": 16132 }, { "epoch": 19.795092024539876, "grad_norm": 0.31869399547576904, "learning_rate": 1.4219013757191257e-08, "loss": 0.47401362657546997, "step": 16133 }, { "epoch": 19.79631901840491, "grad_norm": 0.2688331604003906, "learning_rate": 1.4050259385700593e-08, "loss": 0.725259006023407, "step": 16134 }, { "epoch": 19.79754601226994, "grad_norm": 0.27281084656715393, "learning_rate": 1.3882512126578429e-08, "loss": 0.8627501726150513, "step": 16135 }, { "epoch": 19.79877300613497, "grad_norm": 0.24760672450065613, "learning_rate": 1.3715771986591575e-08, "loss": 0.5549024343490601, "step": 16136 }, { "epoch": 19.8, "grad_norm": 0.2531871497631073, "learning_rate": 1.355003897245688e-08, "loss": 0.4592863619327545, "step": 16137 }, { "epoch": 19.80122699386503, "grad_norm": 0.2760011553764343, "learning_rate": 1.3385313090857887e-08, "loss": 0.5727841258049011, "step": 16138 }, { "epoch": 19.80245398773006, "grad_norm": 0.2407931238412857, "learning_rate": 1.322159434843373e-08, "loss": 0.2982398271560669, "step": 16139 }, { "epoch": 19.803680981595093, "grad_norm": 0.26291385293006897, "learning_rate": 1.3058882751781909e-08, "loss": 0.7060896158218384, "step": 16140 }, { "epoch": 19.80490797546012, "grad_norm": 0.27488279342651367, "learning_rate": 1.2897178307461067e-08, "loss": 0.44591644406318665, "step": 16141 }, { "epoch": 19.806134969325154, "grad_norm": 0.2684621810913086, "learning_rate": 1.2736481021990987e-08, "loss": 0.5543627142906189, "step": 16142 }, { "epoch": 19.807361963190186, "grad_norm": 0.26200249791145325, "learning_rate": 1.257679090184427e-08, "loss": 0.5632297396659851, "step": 16143 }, { "epoch": 19.808588957055214, "grad_norm": 0.34392958879470825, "learning_rate": 1.2418107953462988e-08, "loss": 0.5739174485206604, "step": 16144 }, { "epoch": 19.809815950920246, "grad_norm": 0.27134230732917786, "learning_rate": 1.2260432183242021e-08, "loss": 0.5653071403503418, "step": 16145 }, { "epoch": 19.811042944785274, "grad_norm": 0.2700212299823761, "learning_rate": 1.2103763597534622e-08, "loss": 0.6311249136924744, "step": 16146 }, { "epoch": 19.812269938650306, "grad_norm": 0.3041534423828125, "learning_rate": 1.1948102202655187e-08, "loss": 0.553008496761322, "step": 16147 }, { "epoch": 19.81349693251534, "grad_norm": 0.2412063181400299, "learning_rate": 1.1793448004882024e-08, "loss": 0.34542107582092285, "step": 16148 }, { "epoch": 19.814723926380367, "grad_norm": 0.29806700348854065, "learning_rate": 1.163980101044626e-08, "loss": 0.6542364358901978, "step": 16149 }, { "epoch": 19.8159509202454, "grad_norm": 0.2696397006511688, "learning_rate": 1.1487161225540167e-08, "loss": 0.6095598936080933, "step": 16150 }, { "epoch": 19.81717791411043, "grad_norm": 0.24587808549404144, "learning_rate": 1.1335528656317151e-08, "loss": 0.5262739658355713, "step": 16151 }, { "epoch": 19.81840490797546, "grad_norm": 0.2562694251537323, "learning_rate": 1.1184903308888995e-08, "loss": 0.7235836982727051, "step": 16152 }, { "epoch": 19.81963190184049, "grad_norm": 0.26316675543785095, "learning_rate": 1.1035285189325839e-08, "loss": 0.685309886932373, "step": 16153 }, { "epoch": 19.82085889570552, "grad_norm": 0.23587004840373993, "learning_rate": 1.0886674303661749e-08, "loss": 0.4574720859527588, "step": 16154 }, { "epoch": 19.822085889570552, "grad_norm": 0.2571620047092438, "learning_rate": 1.0739070657883598e-08, "loss": 0.5842510461807251, "step": 16155 }, { "epoch": 19.823312883435584, "grad_norm": 0.29056602716445923, "learning_rate": 1.0592474257942186e-08, "loss": 0.657039999961853, "step": 16156 }, { "epoch": 19.824539877300612, "grad_norm": 0.2734782099723816, "learning_rate": 1.0446885109746673e-08, "loss": 0.5873494148254395, "step": 16157 }, { "epoch": 19.825766871165644, "grad_norm": 0.30578967928886414, "learning_rate": 1.0302303219161813e-08, "loss": 0.6396012306213379, "step": 16158 }, { "epoch": 19.826993865030676, "grad_norm": 0.2994247078895569, "learning_rate": 1.0158728592021826e-08, "loss": 0.4040101170539856, "step": 16159 }, { "epoch": 19.828220858895705, "grad_norm": 0.29394763708114624, "learning_rate": 1.00161612341082e-08, "loss": 0.5635397434234619, "step": 16160 }, { "epoch": 19.829447852760737, "grad_norm": 0.2679688036441803, "learning_rate": 9.874601151171892e-09, "loss": 0.5635303854942322, "step": 16161 }, { "epoch": 19.830674846625765, "grad_norm": 0.25002577900886536, "learning_rate": 9.734048348913893e-09, "loss": 0.40179643034935, "step": 16162 }, { "epoch": 19.831901840490797, "grad_norm": 0.23891015350818634, "learning_rate": 9.594502833001895e-09, "loss": 0.5011698603630066, "step": 16163 }, { "epoch": 19.83312883435583, "grad_norm": 0.2804563641548157, "learning_rate": 9.455964609061951e-09, "loss": 0.3717347979545593, "step": 16164 }, { "epoch": 19.834355828220858, "grad_norm": 0.28568193316459656, "learning_rate": 9.318433682678484e-09, "loss": 0.7565097808837891, "step": 16165 }, { "epoch": 19.83558282208589, "grad_norm": 0.24735027551651, "learning_rate": 9.181910059388733e-09, "loss": 0.5839478969573975, "step": 16166 }, { "epoch": 19.83680981595092, "grad_norm": 0.2715287208557129, "learning_rate": 9.046393744702176e-09, "loss": 0.647666335105896, "step": 16167 }, { "epoch": 19.83803680981595, "grad_norm": 0.268315851688385, "learning_rate": 8.911884744081112e-09, "loss": 0.5426980257034302, "step": 16168 }, { "epoch": 19.839263803680982, "grad_norm": 0.25851765275001526, "learning_rate": 8.778383062943429e-09, "loss": 0.38326501846313477, "step": 16169 }, { "epoch": 19.84049079754601, "grad_norm": 0.2651871144771576, "learning_rate": 8.645888706670935e-09, "loss": 0.5296850800514221, "step": 16170 }, { "epoch": 19.841717791411043, "grad_norm": 0.2683587670326233, "learning_rate": 8.514401680606577e-09, "loss": 0.6749495267868042, "step": 16171 }, { "epoch": 19.842944785276075, "grad_norm": 0.29069486260414124, "learning_rate": 8.383921990048892e-09, "loss": 0.7353404760360718, "step": 16172 }, { "epoch": 19.844171779141103, "grad_norm": 0.2755039930343628, "learning_rate": 8.254449640257567e-09, "loss": 0.6960476040840149, "step": 16173 }, { "epoch": 19.845398773006135, "grad_norm": 0.26910510659217834, "learning_rate": 8.12598463644787e-09, "loss": 0.571635365486145, "step": 16174 }, { "epoch": 19.846625766871167, "grad_norm": 0.2472212314605713, "learning_rate": 7.998526983801768e-09, "loss": 0.4922823905944824, "step": 16175 }, { "epoch": 19.847852760736195, "grad_norm": 0.2671893537044525, "learning_rate": 7.872076687456819e-09, "loss": 0.633426308631897, "step": 16176 }, { "epoch": 19.849079754601227, "grad_norm": 0.2767641842365265, "learning_rate": 7.746633752508948e-09, "loss": 0.6906798481941223, "step": 16177 }, { "epoch": 19.85030674846626, "grad_norm": 0.24988150596618652, "learning_rate": 7.62219818401244e-09, "loss": 0.46294450759887695, "step": 16178 }, { "epoch": 19.851533742331288, "grad_norm": 0.25956326723098755, "learning_rate": 7.49876998698551e-09, "loss": 0.5465369820594788, "step": 16179 }, { "epoch": 19.85276073619632, "grad_norm": 0.29296278953552246, "learning_rate": 7.376349166401952e-09, "loss": 0.5256083011627197, "step": 16180 }, { "epoch": 19.85398773006135, "grad_norm": 0.2679578363895416, "learning_rate": 7.25493572719671e-09, "loss": 0.7635791301727295, "step": 16181 }, { "epoch": 19.85521472392638, "grad_norm": 0.2297963947057724, "learning_rate": 7.1345296742630905e-09, "loss": 0.5203582644462585, "step": 16182 }, { "epoch": 19.856441717791412, "grad_norm": 0.2883889973163605, "learning_rate": 7.015131012455545e-09, "loss": 0.7470743060112, "step": 16183 }, { "epoch": 19.85766871165644, "grad_norm": 0.27914828062057495, "learning_rate": 6.8967397465868884e-09, "loss": 0.818249523639679, "step": 16184 }, { "epoch": 19.858895705521473, "grad_norm": 0.24401293694972992, "learning_rate": 6.779355881425531e-09, "loss": 0.4289743900299072, "step": 16185 }, { "epoch": 19.860122699386505, "grad_norm": 0.3110334575176239, "learning_rate": 6.6629794217065724e-09, "loss": 0.6972857117652893, "step": 16186 }, { "epoch": 19.861349693251533, "grad_norm": 0.2281228005886078, "learning_rate": 6.547610372120705e-09, "loss": 0.43519943952560425, "step": 16187 }, { "epoch": 19.862576687116565, "grad_norm": 0.24871395528316498, "learning_rate": 6.433248737316988e-09, "loss": 0.5509378910064697, "step": 16188 }, { "epoch": 19.863803680981594, "grad_norm": 0.26733362674713135, "learning_rate": 6.319894521902847e-09, "loss": 0.5738027691841125, "step": 16189 }, { "epoch": 19.865030674846626, "grad_norm": 0.3002522885799408, "learning_rate": 6.207547730452401e-09, "loss": 0.3993024528026581, "step": 16190 }, { "epoch": 19.866257668711658, "grad_norm": 0.3009348511695862, "learning_rate": 6.096208367489808e-09, "loss": 0.7532112002372742, "step": 16191 }, { "epoch": 19.867484662576686, "grad_norm": 0.25022900104522705, "learning_rate": 5.9858764375031465e-09, "loss": 0.6427910327911377, "step": 16192 }, { "epoch": 19.868711656441718, "grad_norm": 0.2702728807926178, "learning_rate": 5.876551944941633e-09, "loss": 0.5427796840667725, "step": 16193 }, { "epoch": 19.86993865030675, "grad_norm": 0.25946158170700073, "learning_rate": 5.7682348942100785e-09, "loss": 0.6330570578575134, "step": 16194 }, { "epoch": 19.87116564417178, "grad_norm": 0.24378158152103424, "learning_rate": 5.66092528967721e-09, "loss": 0.49621766805648804, "step": 16195 }, { "epoch": 19.87239263803681, "grad_norm": 0.249457448720932, "learning_rate": 5.554623135664572e-09, "loss": 0.4742406904697418, "step": 16196 }, { "epoch": 19.87361963190184, "grad_norm": 0.22787180542945862, "learning_rate": 5.449328436460399e-09, "loss": 0.5120702981948853, "step": 16197 }, { "epoch": 19.87484662576687, "grad_norm": 0.3254927694797516, "learning_rate": 5.345041196305744e-09, "loss": 0.7485535144805908, "step": 16198 }, { "epoch": 19.876073619631903, "grad_norm": 0.2474086582660675, "learning_rate": 5.241761419405578e-09, "loss": 0.5528963208198547, "step": 16199 }, { "epoch": 19.87730061349693, "grad_norm": 0.31348666548728943, "learning_rate": 5.139489109923234e-09, "loss": 0.7659933567047119, "step": 16200 }, { "epoch": 19.878527607361963, "grad_norm": 0.31444302201271057, "learning_rate": 5.038224271980419e-09, "loss": 0.674436092376709, "step": 16201 }, { "epoch": 19.879754601226995, "grad_norm": 0.2814277112483978, "learning_rate": 4.937966909657199e-09, "loss": 0.6168652772903442, "step": 16202 }, { "epoch": 19.880981595092024, "grad_norm": 0.29662981629371643, "learning_rate": 4.838717026997563e-09, "loss": 0.5741356015205383, "step": 16203 }, { "epoch": 19.882208588957056, "grad_norm": 0.25058355927467346, "learning_rate": 4.74047462800109e-09, "loss": 0.5623442530632019, "step": 16204 }, { "epoch": 19.883435582822084, "grad_norm": 0.3032751977443695, "learning_rate": 4.6432397166285e-09, "loss": 0.7177292108535767, "step": 16205 }, { "epoch": 19.884662576687116, "grad_norm": 0.2613092064857483, "learning_rate": 4.547012296796105e-09, "loss": 0.705600380897522, "step": 16206 }, { "epoch": 19.88588957055215, "grad_norm": 0.25566813349723816, "learning_rate": 4.4517923723841335e-09, "loss": 0.5601306557655334, "step": 16207 }, { "epoch": 19.887116564417177, "grad_norm": 0.23225629329681396, "learning_rate": 4.357579947233959e-09, "loss": 0.539310097694397, "step": 16208 }, { "epoch": 19.88834355828221, "grad_norm": 0.26849034428596497, "learning_rate": 4.26437502513699e-09, "loss": 0.6267234683036804, "step": 16209 }, { "epoch": 19.88957055214724, "grad_norm": 0.29642733931541443, "learning_rate": 4.172177609854111e-09, "loss": 0.7983720302581787, "step": 16210 }, { "epoch": 19.89079754601227, "grad_norm": 0.3397233784198761, "learning_rate": 4.080987705099016e-09, "loss": 0.4387090802192688, "step": 16211 }, { "epoch": 19.8920245398773, "grad_norm": 0.24321438372135162, "learning_rate": 3.990805314549318e-09, "loss": 0.44809988141059875, "step": 16212 }, { "epoch": 19.89325153374233, "grad_norm": 0.3020300567150116, "learning_rate": 3.901630441840998e-09, "loss": 0.6333901882171631, "step": 16213 }, { "epoch": 19.89447852760736, "grad_norm": 0.23879368603229523, "learning_rate": 3.8134630905656274e-09, "loss": 0.3145845830440521, "step": 16214 }, { "epoch": 19.895705521472394, "grad_norm": 0.281195729970932, "learning_rate": 3.726303264278696e-09, "loss": 0.4267631769180298, "step": 16215 }, { "epoch": 19.896932515337422, "grad_norm": 0.2915148437023163, "learning_rate": 3.6401509664912848e-09, "loss": 0.7021991014480591, "step": 16216 }, { "epoch": 19.898159509202454, "grad_norm": 0.2826932966709137, "learning_rate": 3.5550062006811656e-09, "loss": 0.6565602421760559, "step": 16217 }, { "epoch": 19.899386503067486, "grad_norm": 0.2678978443145752, "learning_rate": 3.4708689702733776e-09, "loss": 0.49907374382019043, "step": 16218 }, { "epoch": 19.900613496932515, "grad_norm": 0.2627774178981781, "learning_rate": 3.3877392786624273e-09, "loss": 0.5407178401947021, "step": 16219 }, { "epoch": 19.901840490797547, "grad_norm": 0.28935369849205017, "learning_rate": 3.3056171292011882e-09, "loss": 0.6748589873313904, "step": 16220 }, { "epoch": 19.903067484662575, "grad_norm": 0.27842453122138977, "learning_rate": 3.2245025251953496e-09, "loss": 0.7646443843841553, "step": 16221 }, { "epoch": 19.904294478527607, "grad_norm": 0.2733169496059418, "learning_rate": 3.1443954699200694e-09, "loss": 0.6724434494972229, "step": 16222 }, { "epoch": 19.90552147239264, "grad_norm": 0.29514604806900024, "learning_rate": 3.06529596659777e-09, "loss": 0.5396702289581299, "step": 16223 }, { "epoch": 19.906748466257667, "grad_norm": 0.2790028750896454, "learning_rate": 2.987204018420342e-09, "loss": 0.736526608467102, "step": 16224 }, { "epoch": 19.9079754601227, "grad_norm": 0.2966996729373932, "learning_rate": 2.9101196285352684e-09, "loss": 0.8032228946685791, "step": 16225 }, { "epoch": 19.90920245398773, "grad_norm": 0.24839043617248535, "learning_rate": 2.834042800051173e-09, "loss": 0.37957412004470825, "step": 16226 }, { "epoch": 19.91042944785276, "grad_norm": 0.2736720144748688, "learning_rate": 2.7589735360322724e-09, "loss": 0.7692750096321106, "step": 16227 }, { "epoch": 19.911656441717792, "grad_norm": 0.252001017332077, "learning_rate": 2.684911839503923e-09, "loss": 0.5340638160705566, "step": 16228 }, { "epoch": 19.91288343558282, "grad_norm": 0.26210591197013855, "learning_rate": 2.6118577134498503e-09, "loss": 0.5110554695129395, "step": 16229 }, { "epoch": 19.914110429447852, "grad_norm": 0.30329135060310364, "learning_rate": 2.5398111608204713e-09, "loss": 0.7488691806793213, "step": 16230 }, { "epoch": 19.915337423312884, "grad_norm": 0.28727856278419495, "learning_rate": 2.4687721845162437e-09, "loss": 0.6793022155761719, "step": 16231 }, { "epoch": 19.916564417177913, "grad_norm": 0.2557753026485443, "learning_rate": 2.3987407873987675e-09, "loss": 0.4969423711299896, "step": 16232 }, { "epoch": 19.917791411042945, "grad_norm": 0.25132516026496887, "learning_rate": 2.329716972293561e-09, "loss": 0.5655540823936462, "step": 16233 }, { "epoch": 19.919018404907977, "grad_norm": 0.3251507580280304, "learning_rate": 2.2617007419817313e-09, "loss": 0.7593796253204346, "step": 16234 }, { "epoch": 19.920245398773005, "grad_norm": 0.2656572759151459, "learning_rate": 2.1946920992027553e-09, "loss": 0.6801402568817139, "step": 16235 }, { "epoch": 19.921472392638037, "grad_norm": 0.26032915711402893, "learning_rate": 2.1286910466628006e-09, "loss": 0.5409698486328125, "step": 16236 }, { "epoch": 19.92269938650307, "grad_norm": 0.25649771094322205, "learning_rate": 2.0636975870180764e-09, "loss": 0.42548850178718567, "step": 16237 }, { "epoch": 19.923926380368098, "grad_norm": 0.28882917761802673, "learning_rate": 1.999711722891484e-09, "loss": 0.6696641445159912, "step": 16238 }, { "epoch": 19.92515337423313, "grad_norm": 0.25472167134284973, "learning_rate": 1.936733456855966e-09, "loss": 0.5762321949005127, "step": 16239 }, { "epoch": 19.926380368098158, "grad_norm": 0.24897408485412598, "learning_rate": 1.8747627914567077e-09, "loss": 0.606791615486145, "step": 16240 }, { "epoch": 19.92760736196319, "grad_norm": 0.30643516778945923, "learning_rate": 1.8137997291889363e-09, "loss": 0.605108380317688, "step": 16241 }, { "epoch": 19.928834355828222, "grad_norm": 0.3065126836299896, "learning_rate": 1.7538442725090198e-09, "loss": 0.6616383790969849, "step": 16242 }, { "epoch": 19.93006134969325, "grad_norm": 0.27115145325660706, "learning_rate": 1.694896423834469e-09, "loss": 0.7380319237709045, "step": 16243 }, { "epoch": 19.931288343558283, "grad_norm": 0.2318820357322693, "learning_rate": 1.6369561855411608e-09, "loss": 0.47997498512268066, "step": 16244 }, { "epoch": 19.93251533742331, "grad_norm": 0.2823431193828583, "learning_rate": 1.580023559966115e-09, "loss": 0.7115047574043274, "step": 16245 }, { "epoch": 19.933742331288343, "grad_norm": 0.30200645327568054, "learning_rate": 1.5240985493991667e-09, "loss": 0.7860561609268188, "step": 16246 }, { "epoch": 19.934969325153375, "grad_norm": 0.2997346520423889, "learning_rate": 1.4691811560996193e-09, "loss": 0.6527193784713745, "step": 16247 }, { "epoch": 19.936196319018403, "grad_norm": 0.24736203253269196, "learning_rate": 1.4152713822795927e-09, "loss": 0.5359556674957275, "step": 16248 }, { "epoch": 19.937423312883435, "grad_norm": 0.26944491267204285, "learning_rate": 1.362369230112348e-09, "loss": 0.5682604312896729, "step": 16249 }, { "epoch": 19.938650306748468, "grad_norm": 0.2371739149093628, "learning_rate": 1.3104747017295139e-09, "loss": 0.5160844326019287, "step": 16250 }, { "epoch": 19.939877300613496, "grad_norm": 0.28414422273635864, "learning_rate": 1.2595877992238603e-09, "loss": 0.6134630441665649, "step": 16251 }, { "epoch": 19.941104294478528, "grad_norm": 0.2545883059501648, "learning_rate": 1.209708524643749e-09, "loss": 0.3812911808490753, "step": 16252 }, { "epoch": 19.94233128834356, "grad_norm": 0.24534358084201813, "learning_rate": 1.160836880001459e-09, "loss": 0.4503448009490967, "step": 16253 }, { "epoch": 19.94355828220859, "grad_norm": 0.3093093931674957, "learning_rate": 1.1129728672676364e-09, "loss": 0.614890456199646, "step": 16254 }, { "epoch": 19.94478527607362, "grad_norm": 0.2969963550567627, "learning_rate": 1.0661164883712937e-09, "loss": 0.6617600917816162, "step": 16255 }, { "epoch": 19.94601226993865, "grad_norm": 0.24619616568088531, "learning_rate": 1.02026774519981e-09, "loss": 0.61571204662323, "step": 16256 }, { "epoch": 19.94723926380368, "grad_norm": 0.2435264140367508, "learning_rate": 9.754266396017065e-10, "loss": 0.44566264748573303, "step": 16257 }, { "epoch": 19.948466257668713, "grad_norm": 0.290048748254776, "learning_rate": 9.315931733866467e-10, "loss": 0.7629603743553162, "step": 16258 }, { "epoch": 19.94969325153374, "grad_norm": 0.2462032437324524, "learning_rate": 8.887673483171099e-10, "loss": 0.3848365545272827, "step": 16259 }, { "epoch": 19.950920245398773, "grad_norm": 0.2963328957557678, "learning_rate": 8.469491661222684e-10, "loss": 0.7475588321685791, "step": 16260 }, { "epoch": 19.952147239263805, "grad_norm": 0.27099505066871643, "learning_rate": 8.06138628489661e-10, "loss": 0.49332553148269653, "step": 16261 }, { "epoch": 19.953374233128834, "grad_norm": 0.25060924887657166, "learning_rate": 7.663357370596425e-10, "loss": 0.5212326049804688, "step": 16262 }, { "epoch": 19.954601226993866, "grad_norm": 0.2802629768848419, "learning_rate": 7.275404934364849e-10, "loss": 0.6265736222267151, "step": 16263 }, { "epoch": 19.955828220858894, "grad_norm": 0.24711941182613373, "learning_rate": 6.897528991883784e-10, "loss": 0.6305452585220337, "step": 16264 }, { "epoch": 19.957055214723926, "grad_norm": 0.2828010320663452, "learning_rate": 6.529729558363284e-10, "loss": 0.5041950941085815, "step": 16265 }, { "epoch": 19.958282208588958, "grad_norm": 0.26520174741744995, "learning_rate": 6.172006648624828e-10, "loss": 0.7630486488342285, "step": 16266 }, { "epoch": 19.959509202453987, "grad_norm": 0.22848939895629883, "learning_rate": 5.824360277073559e-10, "loss": 0.39715874195098877, "step": 16267 }, { "epoch": 19.96073619631902, "grad_norm": 0.25177448987960815, "learning_rate": 5.486790457753799e-10, "loss": 0.4023863971233368, "step": 16268 }, { "epoch": 19.96196319018405, "grad_norm": 0.29539355635643005, "learning_rate": 5.159297204238023e-10, "loss": 0.5298452377319336, "step": 16269 }, { "epoch": 19.96319018404908, "grad_norm": 0.2694055438041687, "learning_rate": 4.841880529765641e-10, "loss": 0.45743635296821594, "step": 16270 }, { "epoch": 19.96441717791411, "grad_norm": 0.25338271260261536, "learning_rate": 4.534540447076463e-10, "loss": 0.5844091176986694, "step": 16271 }, { "epoch": 19.96564417177914, "grad_norm": 0.2750682234764099, "learning_rate": 4.2372769686049863e-10, "loss": 0.4653807282447815, "step": 16272 }, { "epoch": 19.96687116564417, "grad_norm": 0.23543284833431244, "learning_rate": 3.9500901063416194e-10, "loss": 0.5624647736549377, "step": 16273 }, { "epoch": 19.968098159509204, "grad_norm": 0.28273266553878784, "learning_rate": 3.672979871804927e-10, "loss": 0.6415307521820068, "step": 16274 }, { "epoch": 19.969325153374232, "grad_norm": 0.26589664816856384, "learning_rate": 3.4059462762359164e-10, "loss": 0.629359781742096, "step": 16275 }, { "epoch": 19.970552147239264, "grad_norm": 0.30133673548698425, "learning_rate": 3.1489893303204843e-10, "loss": 0.6743714213371277, "step": 16276 }, { "epoch": 19.971779141104296, "grad_norm": 0.24149096012115479, "learning_rate": 2.902109044466972e-10, "loss": 0.5655030012130737, "step": 16277 }, { "epoch": 19.973006134969324, "grad_norm": 0.2459070384502411, "learning_rate": 2.665305428639631e-10, "loss": 0.5063390731811523, "step": 16278 }, { "epoch": 19.974233128834356, "grad_norm": 0.27514341473579407, "learning_rate": 2.4385784923308674e-10, "loss": 0.6695590615272522, "step": 16279 }, { "epoch": 19.975460122699385, "grad_norm": 0.29874908924102783, "learning_rate": 2.2219282447277778e-10, "loss": 0.34940165281295776, "step": 16280 }, { "epoch": 19.976687116564417, "grad_norm": 0.27747642993927, "learning_rate": 2.015354694517857e-10, "loss": 0.6391558647155762, "step": 16281 }, { "epoch": 19.97791411042945, "grad_norm": 0.2599896788597107, "learning_rate": 1.8188578500832888e-10, "loss": 0.5198867321014404, "step": 16282 }, { "epoch": 19.979141104294477, "grad_norm": 0.291477769613266, "learning_rate": 1.6324377193066565e-10, "loss": 0.6670821905136108, "step": 16283 }, { "epoch": 19.98036809815951, "grad_norm": 0.2764919102191925, "learning_rate": 1.4560943096819658e-10, "loss": 0.5779229402542114, "step": 16284 }, { "epoch": 19.98159509202454, "grad_norm": 0.2672038972377777, "learning_rate": 1.289827628370155e-10, "loss": 0.5859079957008362, "step": 16285 }, { "epoch": 19.98282208588957, "grad_norm": 0.25981295108795166, "learning_rate": 1.1336376820325623e-10, "loss": 0.6229197978973389, "step": 16286 }, { "epoch": 19.9840490797546, "grad_norm": 0.2849527895450592, "learning_rate": 9.87524476997459e-11, "loss": 0.7990140914916992, "step": 16287 }, { "epoch": 19.98527607361963, "grad_norm": 0.27847468852996826, "learning_rate": 8.514880191212715e-11, "loss": 0.5412949323654175, "step": 16288 }, { "epoch": 19.986503067484662, "grad_norm": 0.23352737724781036, "learning_rate": 7.255283138996039e-11, "loss": 0.3678314983844757, "step": 16289 }, { "epoch": 19.987730061349694, "grad_norm": 0.2763413190841675, "learning_rate": 6.096453664117263e-11, "loss": 0.6856487989425659, "step": 16290 }, { "epoch": 19.988957055214723, "grad_norm": 0.25349316000938416, "learning_rate": 5.038391813483312e-11, "loss": 0.5396376848220825, "step": 16291 }, { "epoch": 19.990184049079755, "grad_norm": 0.25586146116256714, "learning_rate": 4.0810976292826597e-11, "loss": 0.6023741960525513, "step": 16292 }, { "epoch": 19.991411042944787, "grad_norm": 0.8770232200622559, "learning_rate": 3.224571150373112e-11, "loss": 0.4764806628227234, "step": 16293 }, { "epoch": 19.992638036809815, "grad_norm": 0.3072080910205841, "learning_rate": 2.4688124114491397e-11, "loss": 0.6098132133483887, "step": 16294 }, { "epoch": 19.993865030674847, "grad_norm": 0.2564147114753723, "learning_rate": 1.8138214427643185e-11, "loss": 0.5958019495010376, "step": 16295 }, { "epoch": 19.99509202453988, "grad_norm": 0.25169652700424194, "learning_rate": 1.2595982706864461e-11, "loss": 0.42747119069099426, "step": 16296 }, { "epoch": 19.996319018404908, "grad_norm": 0.25083857774734497, "learning_rate": 8.061429176975388e-12, "loss": 0.5245164632797241, "step": 16297 }, { "epoch": 19.99754601226994, "grad_norm": 0.24573828279972076, "learning_rate": 4.5345540183872046e-12, "loss": 0.4817044734954834, "step": 16298 }, { "epoch": 19.998773006134968, "grad_norm": 0.2476826310157776, "learning_rate": 2.0153573754289058e-12, "loss": 0.5343716144561768, "step": 16299 }, { "epoch": 20.0, "grad_norm": 0.27893704175949097, "learning_rate": 5.038393480205628e-13, "loss": 0.3585406243801117, "step": 16300 } ], "logging_steps": 1, "max_steps": 16300, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.559192932598415e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }