{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999295526593871, "eval_steps": 500, "global_step": 7097, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00014089468122578373, "grad_norm": 4.912382355975032, "learning_rate": 9.389671361502348e-08, "loss": 1.3625, "step": 1 }, { "epoch": 0.00028178936245156747, "grad_norm": 5.1076370485671445, "learning_rate": 1.8779342723004696e-07, "loss": 1.4097, "step": 2 }, { "epoch": 0.00042268404367735117, "grad_norm": 6.0668098104520904, "learning_rate": 2.8169014084507043e-07, "loss": 1.3878, "step": 3 }, { "epoch": 0.0005635787249031349, "grad_norm": 5.7297602209979255, "learning_rate": 3.755868544600939e-07, "loss": 1.4633, "step": 4 }, { "epoch": 0.0007044734061289186, "grad_norm": 4.996232424300514, "learning_rate": 4.694835680751174e-07, "loss": 1.3706, "step": 5 }, { "epoch": 0.0008453680873547023, "grad_norm": 6.9603790383639295, "learning_rate": 5.633802816901409e-07, "loss": 0.9858, "step": 6 }, { "epoch": 0.000986262768580486, "grad_norm": 5.627170787819283, "learning_rate": 6.572769953051644e-07, "loss": 1.3182, "step": 7 }, { "epoch": 0.0011271574498062699, "grad_norm": 4.043414220434429, "learning_rate": 7.511737089201878e-07, "loss": 0.7238, "step": 8 }, { "epoch": 0.0012680521310320535, "grad_norm": 5.356919392133117, "learning_rate": 8.450704225352114e-07, "loss": 1.3549, "step": 9 }, { "epoch": 0.0014089468122578373, "grad_norm": 4.966123964279566, "learning_rate": 9.389671361502348e-07, "loss": 0.8307, "step": 10 }, { "epoch": 0.001549841493483621, "grad_norm": 4.399430822833066, "learning_rate": 1.0328638497652584e-06, "loss": 1.3321, "step": 11 }, { "epoch": 0.0016907361747094047, "grad_norm": 4.552862641557298, "learning_rate": 1.1267605633802817e-06, "loss": 1.3098, "step": 12 }, { "epoch": 0.0018316308559351885, "grad_norm": 4.231419733631434, "learning_rate": 1.2206572769953053e-06, "loss": 1.3317, "step": 13 }, { "epoch": 0.001972525537160972, "grad_norm": 3.96070501186573, "learning_rate": 1.3145539906103288e-06, "loss": 1.3862, "step": 14 }, { "epoch": 0.0021134202183867557, "grad_norm": 5.0590474893331, "learning_rate": 1.4084507042253523e-06, "loss": 0.837, "step": 15 }, { "epoch": 0.0022543148996125397, "grad_norm": 3.426967097410558, "learning_rate": 1.5023474178403756e-06, "loss": 1.3234, "step": 16 }, { "epoch": 0.0023952095808383233, "grad_norm": 3.144241397296948, "learning_rate": 1.5962441314553992e-06, "loss": 0.6271, "step": 17 }, { "epoch": 0.002536104262064107, "grad_norm": 2.66308017242415, "learning_rate": 1.6901408450704227e-06, "loss": 1.211, "step": 18 }, { "epoch": 0.002676998943289891, "grad_norm": 6.594789649054197, "learning_rate": 1.784037558685446e-06, "loss": 1.1131, "step": 19 }, { "epoch": 0.0028178936245156746, "grad_norm": 2.6542647775445887, "learning_rate": 1.8779342723004696e-06, "loss": 1.2876, "step": 20 }, { "epoch": 0.002958788305741458, "grad_norm": 2.1571119794271962, "learning_rate": 1.971830985915493e-06, "loss": 0.5749, "step": 21 }, { "epoch": 0.003099682986967242, "grad_norm": 2.2093529685511335, "learning_rate": 2.065727699530517e-06, "loss": 1.1875, "step": 22 }, { "epoch": 0.0032405776681930258, "grad_norm": 1.4808592209251308, "learning_rate": 2.15962441314554e-06, "loss": 0.4753, "step": 23 }, { "epoch": 0.0033814723494188094, "grad_norm": 2.5223784023935663, "learning_rate": 2.2535211267605635e-06, "loss": 1.2522, "step": 24 }, { "epoch": 0.003522367030644593, "grad_norm": 2.378238204480773, "learning_rate": 2.347417840375587e-06, "loss": 1.2698, "step": 25 }, { "epoch": 0.003663261711870377, "grad_norm": 2.15799444866533, "learning_rate": 2.4413145539906105e-06, "loss": 1.2036, "step": 26 }, { "epoch": 0.0038041563930961606, "grad_norm": 2.3075265647504644, "learning_rate": 2.535211267605634e-06, "loss": 1.189, "step": 27 }, { "epoch": 0.003945051074321944, "grad_norm": 2.0065013197673593, "learning_rate": 2.6291079812206576e-06, "loss": 1.186, "step": 28 }, { "epoch": 0.004085945755547728, "grad_norm": 1.7860191682225164, "learning_rate": 2.723004694835681e-06, "loss": 0.5955, "step": 29 }, { "epoch": 0.004226840436773511, "grad_norm": 1.8941397225945553, "learning_rate": 2.8169014084507046e-06, "loss": 1.1891, "step": 30 }, { "epoch": 0.004367735117999295, "grad_norm": 1.967418917414035, "learning_rate": 2.910798122065728e-06, "loss": 1.1894, "step": 31 }, { "epoch": 0.0045086297992250795, "grad_norm": 1.851105215596139, "learning_rate": 3.0046948356807513e-06, "loss": 1.1191, "step": 32 }, { "epoch": 0.004649524480450863, "grad_norm": 1.7986511030807322, "learning_rate": 3.0985915492957746e-06, "loss": 1.1554, "step": 33 }, { "epoch": 0.004790419161676647, "grad_norm": 1.9614038672768894, "learning_rate": 3.1924882629107983e-06, "loss": 1.159, "step": 34 }, { "epoch": 0.004931313842902431, "grad_norm": 1.7124867855000574, "learning_rate": 3.286384976525822e-06, "loss": 1.0899, "step": 35 }, { "epoch": 0.005072208524128214, "grad_norm": 2.6916322484446007, "learning_rate": 3.3802816901408454e-06, "loss": 0.8018, "step": 36 }, { "epoch": 0.005213103205353998, "grad_norm": 1.6436275336897188, "learning_rate": 3.474178403755869e-06, "loss": 1.069, "step": 37 }, { "epoch": 0.005353997886579782, "grad_norm": 1.6420531815503943, "learning_rate": 3.568075117370892e-06, "loss": 1.1291, "step": 38 }, { "epoch": 0.005494892567805565, "grad_norm": 1.9102584532067746, "learning_rate": 3.6619718309859158e-06, "loss": 1.2313, "step": 39 }, { "epoch": 0.005635787249031349, "grad_norm": 1.835802739315411, "learning_rate": 3.755868544600939e-06, "loss": 0.515, "step": 40 }, { "epoch": 0.005776681930257133, "grad_norm": 1.4644002420346023, "learning_rate": 3.849765258215963e-06, "loss": 1.0869, "step": 41 }, { "epoch": 0.005917576611482916, "grad_norm": 1.5725078020955943, "learning_rate": 3.943661971830986e-06, "loss": 1.0402, "step": 42 }, { "epoch": 0.0060584712927087, "grad_norm": 1.4427711212567738, "learning_rate": 4.0375586854460095e-06, "loss": 1.0269, "step": 43 }, { "epoch": 0.006199365973934484, "grad_norm": 1.7230076404081383, "learning_rate": 4.131455399061034e-06, "loss": 0.592, "step": 44 }, { "epoch": 0.0063402606551602675, "grad_norm": 1.3928771401385542, "learning_rate": 4.225352112676057e-06, "loss": 1.1203, "step": 45 }, { "epoch": 0.0064811553363860516, "grad_norm": 1.5079025934965808, "learning_rate": 4.31924882629108e-06, "loss": 0.5587, "step": 46 }, { "epoch": 0.006622050017611836, "grad_norm": 1.4486120668331453, "learning_rate": 4.413145539906104e-06, "loss": 1.0722, "step": 47 }, { "epoch": 0.006762944698837619, "grad_norm": 1.435541824792553, "learning_rate": 4.507042253521127e-06, "loss": 1.0127, "step": 48 }, { "epoch": 0.006903839380063403, "grad_norm": 1.3925525103174934, "learning_rate": 4.60093896713615e-06, "loss": 1.0364, "step": 49 }, { "epoch": 0.007044734061289186, "grad_norm": 1.2526192388339632, "learning_rate": 4.694835680751174e-06, "loss": 1.0491, "step": 50 }, { "epoch": 0.00718562874251497, "grad_norm": 1.6935718043171153, "learning_rate": 4.788732394366197e-06, "loss": 0.5191, "step": 51 }, { "epoch": 0.007326523423740754, "grad_norm": 1.3764605867980737, "learning_rate": 4.882629107981221e-06, "loss": 0.4575, "step": 52 }, { "epoch": 0.007467418104966537, "grad_norm": 1.5591103219270126, "learning_rate": 4.976525821596244e-06, "loss": 1.0687, "step": 53 }, { "epoch": 0.007608312786192321, "grad_norm": 1.3041153533098977, "learning_rate": 5.070422535211268e-06, "loss": 0.5395, "step": 54 }, { "epoch": 0.007749207467418105, "grad_norm": 1.3158821474558573, "learning_rate": 5.164319248826292e-06, "loss": 1.0689, "step": 55 }, { "epoch": 0.007890102148643888, "grad_norm": 1.4323291944334033, "learning_rate": 5.258215962441315e-06, "loss": 1.0899, "step": 56 }, { "epoch": 0.008030996829869672, "grad_norm": 1.4545819038929386, "learning_rate": 5.352112676056338e-06, "loss": 1.109, "step": 57 }, { "epoch": 0.008171891511095456, "grad_norm": 1.1969370949734117, "learning_rate": 5.446009389671362e-06, "loss": 0.4708, "step": 58 }, { "epoch": 0.00831278619232124, "grad_norm": 1.4517593597164027, "learning_rate": 5.539906103286385e-06, "loss": 0.52, "step": 59 }, { "epoch": 0.008453680873547023, "grad_norm": 1.381894996111473, "learning_rate": 5.633802816901409e-06, "loss": 1.0567, "step": 60 }, { "epoch": 0.008594575554772808, "grad_norm": 1.4297623587449593, "learning_rate": 5.727699530516433e-06, "loss": 1.0965, "step": 61 }, { "epoch": 0.00873547023599859, "grad_norm": 1.3262101735226064, "learning_rate": 5.821596244131456e-06, "loss": 1.0585, "step": 62 }, { "epoch": 0.008876364917224374, "grad_norm": 1.3460787118766857, "learning_rate": 5.915492957746479e-06, "loss": 0.446, "step": 63 }, { "epoch": 0.009017259598450159, "grad_norm": 1.5584564928710005, "learning_rate": 6.0093896713615026e-06, "loss": 1.0831, "step": 64 }, { "epoch": 0.009158154279675942, "grad_norm": 1.3395512271925032, "learning_rate": 6.103286384976527e-06, "loss": 1.0409, "step": 65 }, { "epoch": 0.009299048960901725, "grad_norm": 1.5450083948197413, "learning_rate": 6.197183098591549e-06, "loss": 1.0671, "step": 66 }, { "epoch": 0.00943994364212751, "grad_norm": 1.6097834144189775, "learning_rate": 6.291079812206573e-06, "loss": 1.0678, "step": 67 }, { "epoch": 0.009580838323353293, "grad_norm": 1.3411078764474547, "learning_rate": 6.384976525821597e-06, "loss": 0.5142, "step": 68 }, { "epoch": 0.009721733004579076, "grad_norm": 1.400890838376178, "learning_rate": 6.478873239436621e-06, "loss": 0.4638, "step": 69 }, { "epoch": 0.009862627685804861, "grad_norm": 1.2646886571475142, "learning_rate": 6.572769953051644e-06, "loss": 1.0341, "step": 70 }, { "epoch": 0.010003522367030645, "grad_norm": 1.3875004792436316, "learning_rate": 6.666666666666667e-06, "loss": 0.9972, "step": 71 }, { "epoch": 0.010144417048256428, "grad_norm": 1.5385848046730264, "learning_rate": 6.760563380281691e-06, "loss": 0.5711, "step": 72 }, { "epoch": 0.010285311729482213, "grad_norm": 1.5404191881363751, "learning_rate": 6.854460093896714e-06, "loss": 0.9985, "step": 73 }, { "epoch": 0.010426206410707996, "grad_norm": 1.403164822205354, "learning_rate": 6.948356807511738e-06, "loss": 1.028, "step": 74 }, { "epoch": 0.010567101091933779, "grad_norm": 1.2996594585817258, "learning_rate": 7.042253521126761e-06, "loss": 1.0465, "step": 75 }, { "epoch": 0.010707995773159564, "grad_norm": 1.2277099823544502, "learning_rate": 7.136150234741784e-06, "loss": 0.9748, "step": 76 }, { "epoch": 0.010848890454385347, "grad_norm": 1.307361300709001, "learning_rate": 7.230046948356808e-06, "loss": 1.0162, "step": 77 }, { "epoch": 0.01098978513561113, "grad_norm": 1.423076538836319, "learning_rate": 7.3239436619718316e-06, "loss": 1.0147, "step": 78 }, { "epoch": 0.011130679816836915, "grad_norm": 1.3807619476388937, "learning_rate": 7.417840375586856e-06, "loss": 1.0653, "step": 79 }, { "epoch": 0.011271574498062698, "grad_norm": 1.2143466146695236, "learning_rate": 7.511737089201878e-06, "loss": 1.0059, "step": 80 }, { "epoch": 0.011412469179288481, "grad_norm": 1.257243586068606, "learning_rate": 7.6056338028169015e-06, "loss": 0.4371, "step": 81 }, { "epoch": 0.011553363860514266, "grad_norm": 1.2805353439528049, "learning_rate": 7.699530516431926e-06, "loss": 0.46, "step": 82 }, { "epoch": 0.01169425854174005, "grad_norm": 1.8381224256008328, "learning_rate": 7.79342723004695e-06, "loss": 0.6751, "step": 83 }, { "epoch": 0.011835153222965833, "grad_norm": 1.4351407122219841, "learning_rate": 7.887323943661972e-06, "loss": 0.9821, "step": 84 }, { "epoch": 0.011976047904191617, "grad_norm": 1.3048711752997357, "learning_rate": 7.981220657276996e-06, "loss": 0.4087, "step": 85 }, { "epoch": 0.0121169425854174, "grad_norm": 1.385634057929717, "learning_rate": 8.075117370892019e-06, "loss": 1.0441, "step": 86 }, { "epoch": 0.012257837266643184, "grad_norm": 1.259705455039276, "learning_rate": 8.169014084507043e-06, "loss": 1.0525, "step": 87 }, { "epoch": 0.012398731947868969, "grad_norm": 1.3855885000124553, "learning_rate": 8.262910798122067e-06, "loss": 1.0483, "step": 88 }, { "epoch": 0.012539626629094752, "grad_norm": 1.2667851403075816, "learning_rate": 8.35680751173709e-06, "loss": 0.4812, "step": 89 }, { "epoch": 0.012680521310320535, "grad_norm": 1.8150419167584386, "learning_rate": 8.450704225352114e-06, "loss": 0.7168, "step": 90 }, { "epoch": 0.01282141599154632, "grad_norm": 1.4114774037165543, "learning_rate": 8.544600938967136e-06, "loss": 0.4998, "step": 91 }, { "epoch": 0.012962310672772103, "grad_norm": 1.3706965191651659, "learning_rate": 8.63849765258216e-06, "loss": 1.0318, "step": 92 }, { "epoch": 0.013103205353997886, "grad_norm": 1.4542419726021274, "learning_rate": 8.732394366197183e-06, "loss": 1.0274, "step": 93 }, { "epoch": 0.013244100035223671, "grad_norm": 1.2843044619090664, "learning_rate": 8.826291079812207e-06, "loss": 0.9825, "step": 94 }, { "epoch": 0.013384994716449454, "grad_norm": 1.3043248228902007, "learning_rate": 8.920187793427231e-06, "loss": 1.0436, "step": 95 }, { "epoch": 0.013525889397675237, "grad_norm": 1.203874618847127, "learning_rate": 9.014084507042254e-06, "loss": 0.9936, "step": 96 }, { "epoch": 0.01366678407890102, "grad_norm": 1.385749138487344, "learning_rate": 9.107981220657278e-06, "loss": 1.0779, "step": 97 }, { "epoch": 0.013807678760126806, "grad_norm": 1.547384136880833, "learning_rate": 9.2018779342723e-06, "loss": 0.5616, "step": 98 }, { "epoch": 0.013948573441352589, "grad_norm": 1.4234053337046702, "learning_rate": 9.295774647887325e-06, "loss": 1.0653, "step": 99 }, { "epoch": 0.014089468122578372, "grad_norm": 1.6783610035489407, "learning_rate": 9.389671361502349e-06, "loss": 0.5605, "step": 100 }, { "epoch": 0.014230362803804157, "grad_norm": 1.612812141427981, "learning_rate": 9.483568075117371e-06, "loss": 0.6016, "step": 101 }, { "epoch": 0.01437125748502994, "grad_norm": 1.2354114742001012, "learning_rate": 9.577464788732394e-06, "loss": 0.9926, "step": 102 }, { "epoch": 0.014512152166255723, "grad_norm": 1.43171791042936, "learning_rate": 9.671361502347418e-06, "loss": 1.0602, "step": 103 }, { "epoch": 0.014653046847481508, "grad_norm": 1.462753082128531, "learning_rate": 9.765258215962442e-06, "loss": 0.5294, "step": 104 }, { "epoch": 0.014793941528707291, "grad_norm": 1.3728612797698965, "learning_rate": 9.859154929577466e-06, "loss": 0.9928, "step": 105 }, { "epoch": 0.014934836209933074, "grad_norm": 1.3347574616622282, "learning_rate": 9.953051643192489e-06, "loss": 1.0553, "step": 106 }, { "epoch": 0.01507573089115886, "grad_norm": 1.3412583771652684, "learning_rate": 1.0046948356807513e-05, "loss": 1.0286, "step": 107 }, { "epoch": 0.015216625572384642, "grad_norm": 1.5006507132779188, "learning_rate": 1.0140845070422535e-05, "loss": 1.0402, "step": 108 }, { "epoch": 0.015357520253610426, "grad_norm": 1.3799983416982622, "learning_rate": 1.0234741784037558e-05, "loss": 1.0569, "step": 109 }, { "epoch": 0.01549841493483621, "grad_norm": 1.3925101403099185, "learning_rate": 1.0328638497652584e-05, "loss": 1.0097, "step": 110 }, { "epoch": 0.015639309616061994, "grad_norm": 1.481466955057947, "learning_rate": 1.0422535211267606e-05, "loss": 0.5279, "step": 111 }, { "epoch": 0.015780204297287777, "grad_norm": 1.3297810894287858, "learning_rate": 1.051643192488263e-05, "loss": 1.0019, "step": 112 }, { "epoch": 0.01592109897851356, "grad_norm": 1.5706416771284923, "learning_rate": 1.0610328638497653e-05, "loss": 0.4956, "step": 113 }, { "epoch": 0.016061993659739343, "grad_norm": 1.4122485186273859, "learning_rate": 1.0704225352112675e-05, "loss": 1.0314, "step": 114 }, { "epoch": 0.01620288834096513, "grad_norm": 1.3834158723347119, "learning_rate": 1.0798122065727701e-05, "loss": 0.9701, "step": 115 }, { "epoch": 0.016343783022190913, "grad_norm": 1.219692461447811, "learning_rate": 1.0892018779342724e-05, "loss": 0.9513, "step": 116 }, { "epoch": 0.016484677703416696, "grad_norm": 1.2456335631267967, "learning_rate": 1.0985915492957748e-05, "loss": 0.9782, "step": 117 }, { "epoch": 0.01662557238464248, "grad_norm": 1.6425607571929683, "learning_rate": 1.107981220657277e-05, "loss": 1.114, "step": 118 }, { "epoch": 0.016766467065868262, "grad_norm": 1.330554655837119, "learning_rate": 1.1173708920187793e-05, "loss": 0.4491, "step": 119 }, { "epoch": 0.016907361747094046, "grad_norm": 1.242684976280859, "learning_rate": 1.1267605633802819e-05, "loss": 1.0222, "step": 120 }, { "epoch": 0.017048256428319832, "grad_norm": 1.4304883905891659, "learning_rate": 1.1361502347417841e-05, "loss": 1.0544, "step": 121 }, { "epoch": 0.017189151109545615, "grad_norm": 1.4904086287290956, "learning_rate": 1.1455399061032865e-05, "loss": 0.5527, "step": 122 }, { "epoch": 0.0173300457907714, "grad_norm": 1.3349062470369664, "learning_rate": 1.1549295774647888e-05, "loss": 0.4443, "step": 123 }, { "epoch": 0.01747094047199718, "grad_norm": 1.254760909440931, "learning_rate": 1.1643192488262912e-05, "loss": 0.4715, "step": 124 }, { "epoch": 0.017611835153222965, "grad_norm": 1.1935191466224675, "learning_rate": 1.1737089201877936e-05, "loss": 0.427, "step": 125 }, { "epoch": 0.017752729834448748, "grad_norm": 1.297774910831504, "learning_rate": 1.1830985915492958e-05, "loss": 0.9972, "step": 126 }, { "epoch": 0.017893624515674535, "grad_norm": 1.4376196745696, "learning_rate": 1.1924882629107981e-05, "loss": 1.0658, "step": 127 }, { "epoch": 0.018034519196900318, "grad_norm": 1.3560721118970536, "learning_rate": 1.2018779342723005e-05, "loss": 1.0254, "step": 128 }, { "epoch": 0.0181754138781261, "grad_norm": 1.593127989517672, "learning_rate": 1.211267605633803e-05, "loss": 1.0583, "step": 129 }, { "epoch": 0.018316308559351884, "grad_norm": 1.4078766023752303, "learning_rate": 1.2206572769953053e-05, "loss": 0.513, "step": 130 }, { "epoch": 0.018457203240577667, "grad_norm": 1.2234034333145465, "learning_rate": 1.2300469483568076e-05, "loss": 0.4156, "step": 131 }, { "epoch": 0.01859809792180345, "grad_norm": 1.2520524160538453, "learning_rate": 1.2394366197183098e-05, "loss": 0.4966, "step": 132 }, { "epoch": 0.018738992603029237, "grad_norm": 1.5372089389761887, "learning_rate": 1.2488262910798124e-05, "loss": 0.9904, "step": 133 }, { "epoch": 0.01887988728425502, "grad_norm": 1.7550227185528033, "learning_rate": 1.2582159624413147e-05, "loss": 0.5945, "step": 134 }, { "epoch": 0.019020781965480803, "grad_norm": 1.3326412950668314, "learning_rate": 1.2676056338028171e-05, "loss": 1.0484, "step": 135 }, { "epoch": 0.019161676646706587, "grad_norm": 1.3918766817128914, "learning_rate": 1.2769953051643193e-05, "loss": 1.0053, "step": 136 }, { "epoch": 0.01930257132793237, "grad_norm": 1.3678904835370194, "learning_rate": 1.2863849765258216e-05, "loss": 0.9955, "step": 137 }, { "epoch": 0.019443466009158153, "grad_norm": 1.539907636331692, "learning_rate": 1.2957746478873242e-05, "loss": 1.0539, "step": 138 }, { "epoch": 0.01958436069038394, "grad_norm": 1.4040310850461983, "learning_rate": 1.3051643192488264e-05, "loss": 1.0979, "step": 139 }, { "epoch": 0.019725255371609723, "grad_norm": 1.2274727989749585, "learning_rate": 1.3145539906103288e-05, "loss": 0.9522, "step": 140 }, { "epoch": 0.019866150052835506, "grad_norm": 1.3792965553091132, "learning_rate": 1.323943661971831e-05, "loss": 1.0504, "step": 141 }, { "epoch": 0.02000704473406129, "grad_norm": 1.4460801437624733, "learning_rate": 1.3333333333333333e-05, "loss": 1.0371, "step": 142 }, { "epoch": 0.020147939415287072, "grad_norm": 1.1771040892084235, "learning_rate": 1.342723004694836e-05, "loss": 0.9766, "step": 143 }, { "epoch": 0.020288834096512855, "grad_norm": 1.298298187321549, "learning_rate": 1.3521126760563382e-05, "loss": 1.027, "step": 144 }, { "epoch": 0.020429728777738642, "grad_norm": 1.2668532285604654, "learning_rate": 1.3615023474178406e-05, "loss": 1.0148, "step": 145 }, { "epoch": 0.020570623458964425, "grad_norm": 1.440645566776818, "learning_rate": 1.3708920187793428e-05, "loss": 0.9781, "step": 146 }, { "epoch": 0.02071151814019021, "grad_norm": 1.19431846591404, "learning_rate": 1.380281690140845e-05, "loss": 0.4122, "step": 147 }, { "epoch": 0.02085241282141599, "grad_norm": 1.7842668283001197, "learning_rate": 1.3896713615023477e-05, "loss": 0.5798, "step": 148 }, { "epoch": 0.020993307502641775, "grad_norm": 1.419854221274279, "learning_rate": 1.3990610328638499e-05, "loss": 0.9999, "step": 149 }, { "epoch": 0.021134202183867558, "grad_norm": 1.3469462599060764, "learning_rate": 1.4084507042253522e-05, "loss": 1.0373, "step": 150 }, { "epoch": 0.02127509686509334, "grad_norm": 1.6643685553721488, "learning_rate": 1.4178403755868546e-05, "loss": 0.5292, "step": 151 }, { "epoch": 0.021415991546319128, "grad_norm": 1.5296216334030945, "learning_rate": 1.4272300469483568e-05, "loss": 0.9823, "step": 152 }, { "epoch": 0.02155688622754491, "grad_norm": 1.4072164535448213, "learning_rate": 1.4366197183098594e-05, "loss": 1.0505, "step": 153 }, { "epoch": 0.021697780908770694, "grad_norm": 1.2450380455615306, "learning_rate": 1.4460093896713617e-05, "loss": 0.9783, "step": 154 }, { "epoch": 0.021838675589996477, "grad_norm": 1.105439423758011, "learning_rate": 1.4553990610328639e-05, "loss": 0.9383, "step": 155 }, { "epoch": 0.02197957027122226, "grad_norm": 1.174475892533617, "learning_rate": 1.4647887323943663e-05, "loss": 0.9521, "step": 156 }, { "epoch": 0.022120464952448043, "grad_norm": 1.2779831105612387, "learning_rate": 1.4741784037558686e-05, "loss": 0.4446, "step": 157 }, { "epoch": 0.02226135963367383, "grad_norm": 1.3913098551395142, "learning_rate": 1.4835680751173711e-05, "loss": 0.9949, "step": 158 }, { "epoch": 0.022402254314899613, "grad_norm": 1.6924019114473943, "learning_rate": 1.4929577464788734e-05, "loss": 0.5806, "step": 159 }, { "epoch": 0.022543148996125396, "grad_norm": 1.3287618779050616, "learning_rate": 1.5023474178403756e-05, "loss": 0.9544, "step": 160 }, { "epoch": 0.02268404367735118, "grad_norm": 1.269855643534599, "learning_rate": 1.511737089201878e-05, "loss": 0.9743, "step": 161 }, { "epoch": 0.022824938358576963, "grad_norm": 1.689141056808817, "learning_rate": 1.5211267605633803e-05, "loss": 0.9745, "step": 162 }, { "epoch": 0.022965833039802746, "grad_norm": 1.100118483803762, "learning_rate": 1.5305164319248827e-05, "loss": 0.4638, "step": 163 }, { "epoch": 0.023106727721028533, "grad_norm": 1.139729752525637, "learning_rate": 1.539906103286385e-05, "loss": 0.4046, "step": 164 }, { "epoch": 0.023247622402254316, "grad_norm": 1.4726837478746675, "learning_rate": 1.5492957746478872e-05, "loss": 0.9902, "step": 165 }, { "epoch": 0.0233885170834801, "grad_norm": 1.2783913623400107, "learning_rate": 1.55868544600939e-05, "loss": 0.5033, "step": 166 }, { "epoch": 0.023529411764705882, "grad_norm": 1.1514228497302403, "learning_rate": 1.568075117370892e-05, "loss": 0.4837, "step": 167 }, { "epoch": 0.023670306445931665, "grad_norm": 1.2477662695719043, "learning_rate": 1.5774647887323945e-05, "loss": 0.4124, "step": 168 }, { "epoch": 0.02381120112715745, "grad_norm": 1.5105909816618066, "learning_rate": 1.586854460093897e-05, "loss": 1.0172, "step": 169 }, { "epoch": 0.023952095808383235, "grad_norm": 1.505989576062187, "learning_rate": 1.5962441314553993e-05, "loss": 0.5178, "step": 170 }, { "epoch": 0.024092990489609018, "grad_norm": 1.3651895876670803, "learning_rate": 1.6056338028169017e-05, "loss": 0.9866, "step": 171 }, { "epoch": 0.0242338851708348, "grad_norm": 1.3175607183196307, "learning_rate": 1.6150234741784038e-05, "loss": 0.4286, "step": 172 }, { "epoch": 0.024374779852060584, "grad_norm": 1.391138353537431, "learning_rate": 1.6244131455399062e-05, "loss": 1.0322, "step": 173 }, { "epoch": 0.024515674533286368, "grad_norm": 1.314763388769862, "learning_rate": 1.6338028169014086e-05, "loss": 0.9712, "step": 174 }, { "epoch": 0.02465656921451215, "grad_norm": 1.6189192866852713, "learning_rate": 1.643192488262911e-05, "loss": 0.5643, "step": 175 }, { "epoch": 0.024797463895737937, "grad_norm": 1.4897058758128419, "learning_rate": 1.6525821596244135e-05, "loss": 1.0058, "step": 176 }, { "epoch": 0.02493835857696372, "grad_norm": 1.1771386489875022, "learning_rate": 1.6619718309859155e-05, "loss": 1.0167, "step": 177 }, { "epoch": 0.025079253258189504, "grad_norm": 1.4029981522436072, "learning_rate": 1.671361502347418e-05, "loss": 1.0565, "step": 178 }, { "epoch": 0.025220147939415287, "grad_norm": 1.2934966438882967, "learning_rate": 1.6807511737089204e-05, "loss": 1.044, "step": 179 }, { "epoch": 0.02536104262064107, "grad_norm": 1.640855688860508, "learning_rate": 1.6901408450704228e-05, "loss": 0.6135, "step": 180 }, { "epoch": 0.025501937301866853, "grad_norm": 1.4397560602934982, "learning_rate": 1.6995305164319252e-05, "loss": 1.0329, "step": 181 }, { "epoch": 0.02564283198309264, "grad_norm": 1.2413797400537423, "learning_rate": 1.7089201877934273e-05, "loss": 1.0172, "step": 182 }, { "epoch": 0.025783726664318423, "grad_norm": 1.1544151078732476, "learning_rate": 1.7183098591549297e-05, "loss": 0.9971, "step": 183 }, { "epoch": 0.025924621345544206, "grad_norm": 1.569818451776417, "learning_rate": 1.727699530516432e-05, "loss": 0.4665, "step": 184 }, { "epoch": 0.02606551602676999, "grad_norm": 1.2592133824794178, "learning_rate": 1.7370892018779345e-05, "loss": 0.4702, "step": 185 }, { "epoch": 0.026206410707995773, "grad_norm": 1.2425261337370757, "learning_rate": 1.7464788732394366e-05, "loss": 0.9944, "step": 186 }, { "epoch": 0.026347305389221556, "grad_norm": 1.2477586561106322, "learning_rate": 1.755868544600939e-05, "loss": 1.0049, "step": 187 }, { "epoch": 0.026488200070447342, "grad_norm": 1.4345017702803167, "learning_rate": 1.7652582159624414e-05, "loss": 0.492, "step": 188 }, { "epoch": 0.026629094751673126, "grad_norm": 1.4389480939861155, "learning_rate": 1.774647887323944e-05, "loss": 1.0495, "step": 189 }, { "epoch": 0.02676998943289891, "grad_norm": 1.5391314413663681, "learning_rate": 1.7840375586854463e-05, "loss": 0.6775, "step": 190 }, { "epoch": 0.026910884114124692, "grad_norm": 1.2154746231350448, "learning_rate": 1.7934272300469484e-05, "loss": 0.9778, "step": 191 }, { "epoch": 0.027051778795350475, "grad_norm": 1.2129341396983024, "learning_rate": 1.8028169014084508e-05, "loss": 1.027, "step": 192 }, { "epoch": 0.027192673476576258, "grad_norm": 1.6400904598391948, "learning_rate": 1.8122065727699532e-05, "loss": 0.6496, "step": 193 }, { "epoch": 0.02733356815780204, "grad_norm": 1.211438509924894, "learning_rate": 1.8215962441314556e-05, "loss": 0.4436, "step": 194 }, { "epoch": 0.027474462839027828, "grad_norm": 1.2512072150650626, "learning_rate": 1.830985915492958e-05, "loss": 0.9738, "step": 195 }, { "epoch": 0.02761535752025361, "grad_norm": 1.4033097106919372, "learning_rate": 1.84037558685446e-05, "loss": 0.9901, "step": 196 }, { "epoch": 0.027756252201479394, "grad_norm": 1.3361862480508662, "learning_rate": 1.8497652582159625e-05, "loss": 1.0232, "step": 197 }, { "epoch": 0.027897146882705177, "grad_norm": 1.284099202938977, "learning_rate": 1.859154929577465e-05, "loss": 0.95, "step": 198 }, { "epoch": 0.02803804156393096, "grad_norm": 1.313989063144536, "learning_rate": 1.8685446009389673e-05, "loss": 1.0009, "step": 199 }, { "epoch": 0.028178936245156744, "grad_norm": 1.2988839055982677, "learning_rate": 1.8779342723004698e-05, "loss": 0.4642, "step": 200 }, { "epoch": 0.02831983092638253, "grad_norm": 1.4114728556561915, "learning_rate": 1.887323943661972e-05, "loss": 1.0458, "step": 201 }, { "epoch": 0.028460725607608314, "grad_norm": 1.184709702878851, "learning_rate": 1.8967136150234743e-05, "loss": 0.3848, "step": 202 }, { "epoch": 0.028601620288834097, "grad_norm": 1.544905400193685, "learning_rate": 1.9061032863849767e-05, "loss": 0.7182, "step": 203 }, { "epoch": 0.02874251497005988, "grad_norm": 1.40142877071731, "learning_rate": 1.9154929577464788e-05, "loss": 0.4778, "step": 204 }, { "epoch": 0.028883409651285663, "grad_norm": 1.4471114552081934, "learning_rate": 1.9248826291079815e-05, "loss": 0.5415, "step": 205 }, { "epoch": 0.029024304332511446, "grad_norm": 1.4350642739437818, "learning_rate": 1.9342723004694836e-05, "loss": 0.9719, "step": 206 }, { "epoch": 0.029165199013737233, "grad_norm": 1.293081237636551, "learning_rate": 1.943661971830986e-05, "loss": 1.0324, "step": 207 }, { "epoch": 0.029306093694963016, "grad_norm": 1.6068848704229028, "learning_rate": 1.9530516431924884e-05, "loss": 0.5715, "step": 208 }, { "epoch": 0.0294469883761888, "grad_norm": 1.124731134767785, "learning_rate": 1.962441314553991e-05, "loss": 0.9642, "step": 209 }, { "epoch": 0.029587883057414582, "grad_norm": 1.6275294015567385, "learning_rate": 1.9718309859154933e-05, "loss": 1.0338, "step": 210 }, { "epoch": 0.029728777738640366, "grad_norm": 1.362560112554573, "learning_rate": 1.9812206572769953e-05, "loss": 0.4829, "step": 211 }, { "epoch": 0.02986967241986615, "grad_norm": 1.327076794953473, "learning_rate": 1.9906103286384977e-05, "loss": 0.4526, "step": 212 }, { "epoch": 0.030010567101091935, "grad_norm": 1.2313314663127155, "learning_rate": 2e-05, "loss": 1.0094, "step": 213 }, { "epoch": 0.03015146178231772, "grad_norm": 1.367234024922032, "learning_rate": 1.9999998958670866e-05, "loss": 1.0104, "step": 214 }, { "epoch": 0.0302923564635435, "grad_norm": 1.1283073248812348, "learning_rate": 1.9999995834683686e-05, "loss": 0.9909, "step": 215 }, { "epoch": 0.030433251144769285, "grad_norm": 1.2269518901704988, "learning_rate": 1.9999990628039102e-05, "loss": 1.0622, "step": 216 }, { "epoch": 0.030574145825995068, "grad_norm": 1.5618611305176038, "learning_rate": 1.9999983338738205e-05, "loss": 1.0162, "step": 217 }, { "epoch": 0.03071504050722085, "grad_norm": 1.2468190830772696, "learning_rate": 1.999997396678251e-05, "loss": 0.9937, "step": 218 }, { "epoch": 0.030855935188446638, "grad_norm": 1.0839494449104907, "learning_rate": 1.999996251217397e-05, "loss": 0.9813, "step": 219 }, { "epoch": 0.03099682986967242, "grad_norm": 1.2892068063436368, "learning_rate": 1.9999948974914972e-05, "loss": 1.0262, "step": 220 }, { "epoch": 0.031137724550898204, "grad_norm": 1.287306673704262, "learning_rate": 1.9999933355008335e-05, "loss": 1.0511, "step": 221 }, { "epoch": 0.03127861923212399, "grad_norm": 1.2344021833235177, "learning_rate": 1.9999915652457308e-05, "loss": 1.0133, "step": 222 }, { "epoch": 0.031419513913349774, "grad_norm": 1.7148884254880816, "learning_rate": 1.999989586726558e-05, "loss": 0.5211, "step": 223 }, { "epoch": 0.031560408594575554, "grad_norm": 1.3288718265669326, "learning_rate": 1.9999873999437276e-05, "loss": 0.9776, "step": 224 }, { "epoch": 0.03170130327580134, "grad_norm": 1.2213669492858326, "learning_rate": 1.9999850048976944e-05, "loss": 1.0256, "step": 225 }, { "epoch": 0.03184219795702712, "grad_norm": 1.441189701656003, "learning_rate": 1.9999824015889576e-05, "loss": 0.5604, "step": 226 }, { "epoch": 0.03198309263825291, "grad_norm": 1.4769891631925758, "learning_rate": 1.9999795900180596e-05, "loss": 0.6531, "step": 227 }, { "epoch": 0.032123987319478686, "grad_norm": 1.3487622648738258, "learning_rate": 1.9999765701855853e-05, "loss": 1.0503, "step": 228 }, { "epoch": 0.03226488200070447, "grad_norm": 1.4526238698886127, "learning_rate": 1.999973342092164e-05, "loss": 1.0555, "step": 229 }, { "epoch": 0.03240577668193026, "grad_norm": 1.2508834575852894, "learning_rate": 1.9999699057384678e-05, "loss": 1.0324, "step": 230 }, { "epoch": 0.03254667136315604, "grad_norm": 1.2711592764915143, "learning_rate": 1.9999662611252128e-05, "loss": 1.0178, "step": 231 }, { "epoch": 0.032687566044381826, "grad_norm": 1.2163133989452437, "learning_rate": 1.9999624082531573e-05, "loss": 0.986, "step": 232 }, { "epoch": 0.032828460725607606, "grad_norm": 1.3010462042093835, "learning_rate": 1.9999583471231047e-05, "loss": 1.0366, "step": 233 }, { "epoch": 0.03296935540683339, "grad_norm": 1.2015396246593457, "learning_rate": 1.9999540777359e-05, "loss": 1.0065, "step": 234 }, { "epoch": 0.03311025008805918, "grad_norm": 1.0941383677065286, "learning_rate": 1.999949600092433e-05, "loss": 1.0041, "step": 235 }, { "epoch": 0.03325114476928496, "grad_norm": 2.282608726940363, "learning_rate": 1.9999449141936353e-05, "loss": 0.5207, "step": 236 }, { "epoch": 0.033392039450510745, "grad_norm": 1.148096811618306, "learning_rate": 1.9999400200404838e-05, "loss": 0.9822, "step": 237 }, { "epoch": 0.033532934131736525, "grad_norm": 1.3867244872586457, "learning_rate": 1.9999349176339975e-05, "loss": 0.4977, "step": 238 }, { "epoch": 0.03367382881296231, "grad_norm": 1.138331551888931, "learning_rate": 1.9999296069752386e-05, "loss": 0.9741, "step": 239 }, { "epoch": 0.03381472349418809, "grad_norm": 1.2794447126893154, "learning_rate": 1.9999240880653138e-05, "loss": 0.4645, "step": 240 }, { "epoch": 0.03395561817541388, "grad_norm": 1.3294007061991984, "learning_rate": 1.999918360905372e-05, "loss": 0.4703, "step": 241 }, { "epoch": 0.034096512856639664, "grad_norm": 1.320927945656803, "learning_rate": 1.9999124254966064e-05, "loss": 1.0351, "step": 242 }, { "epoch": 0.034237407537865444, "grad_norm": 1.2455585412366132, "learning_rate": 1.9999062818402528e-05, "loss": 1.0343, "step": 243 }, { "epoch": 0.03437830221909123, "grad_norm": 1.2643181235402732, "learning_rate": 1.9998999299375908e-05, "loss": 0.5013, "step": 244 }, { "epoch": 0.03451919690031701, "grad_norm": 1.340176193760716, "learning_rate": 1.999893369789943e-05, "loss": 1.0547, "step": 245 }, { "epoch": 0.0346600915815428, "grad_norm": 1.1261079492051096, "learning_rate": 1.9998866013986763e-05, "loss": 0.4632, "step": 246 }, { "epoch": 0.034800986262768584, "grad_norm": 1.1366675304377993, "learning_rate": 1.9998796247651998e-05, "loss": 0.963, "step": 247 }, { "epoch": 0.03494188094399436, "grad_norm": 1.201821980727557, "learning_rate": 1.999872439890967e-05, "loss": 0.9988, "step": 248 }, { "epoch": 0.03508277562522015, "grad_norm": 1.6198013859279659, "learning_rate": 1.9998650467774733e-05, "loss": 0.6323, "step": 249 }, { "epoch": 0.03522367030644593, "grad_norm": 1.2998835077102635, "learning_rate": 1.9998574454262595e-05, "loss": 1.0221, "step": 250 }, { "epoch": 0.035364564987671716, "grad_norm": 1.2942469357938575, "learning_rate": 1.999849635838908e-05, "loss": 0.5411, "step": 251 }, { "epoch": 0.035505459668897496, "grad_norm": 1.3824649427017481, "learning_rate": 1.9998416180170456e-05, "loss": 0.9742, "step": 252 }, { "epoch": 0.03564635435012328, "grad_norm": 1.310907175142716, "learning_rate": 1.9998333919623425e-05, "loss": 0.5657, "step": 253 }, { "epoch": 0.03578724903134907, "grad_norm": 1.3535553253949753, "learning_rate": 1.999824957676511e-05, "loss": 1.0124, "step": 254 }, { "epoch": 0.03592814371257485, "grad_norm": 1.2501623184940482, "learning_rate": 1.9998163151613083e-05, "loss": 1.0244, "step": 255 }, { "epoch": 0.036069038393800636, "grad_norm": 1.3091254933880399, "learning_rate": 1.999807464418534e-05, "loss": 1.0038, "step": 256 }, { "epoch": 0.036209933075026415, "grad_norm": 1.181972827559378, "learning_rate": 1.9997984054500323e-05, "loss": 0.3605, "step": 257 }, { "epoch": 0.0363508277562522, "grad_norm": 1.2064395459326775, "learning_rate": 1.9997891382576884e-05, "loss": 1.0015, "step": 258 }, { "epoch": 0.03649172243747799, "grad_norm": 1.2179009390488171, "learning_rate": 1.9997796628434332e-05, "loss": 1.0334, "step": 259 }, { "epoch": 0.03663261711870377, "grad_norm": 1.1801786540655481, "learning_rate": 1.9997699792092405e-05, "loss": 0.9837, "step": 260 }, { "epoch": 0.036773511799929555, "grad_norm": 1.5803050498682207, "learning_rate": 1.9997600873571265e-05, "loss": 0.6059, "step": 261 }, { "epoch": 0.036914406481155335, "grad_norm": 1.3512776444819101, "learning_rate": 1.999749987289151e-05, "loss": 1.0646, "step": 262 }, { "epoch": 0.03705530116238112, "grad_norm": 1.1600402877986613, "learning_rate": 1.999739679007418e-05, "loss": 1.0, "step": 263 }, { "epoch": 0.0371961958436069, "grad_norm": 1.1447229411939601, "learning_rate": 1.9997291625140745e-05, "loss": 0.9643, "step": 264 }, { "epoch": 0.03733709052483269, "grad_norm": 1.2148456446330682, "learning_rate": 1.9997184378113107e-05, "loss": 1.0285, "step": 265 }, { "epoch": 0.037477985206058474, "grad_norm": 1.1172748989600572, "learning_rate": 1.9997075049013596e-05, "loss": 0.9676, "step": 266 }, { "epoch": 0.037618879887284254, "grad_norm": 1.6308065316132248, "learning_rate": 1.9996963637864985e-05, "loss": 0.5347, "step": 267 }, { "epoch": 0.03775977456851004, "grad_norm": 1.1693892213085, "learning_rate": 1.999685014469048e-05, "loss": 0.9777, "step": 268 }, { "epoch": 0.03790066924973582, "grad_norm": 1.21311705230955, "learning_rate": 1.9996734569513716e-05, "loss": 0.9932, "step": 269 }, { "epoch": 0.03804156393096161, "grad_norm": 1.2274225135454866, "learning_rate": 1.9996616912358766e-05, "loss": 0.99, "step": 270 }, { "epoch": 0.03818245861218739, "grad_norm": 1.161755754655035, "learning_rate": 1.9996497173250123e-05, "loss": 1.0111, "step": 271 }, { "epoch": 0.03832335329341317, "grad_norm": 1.1415784533932918, "learning_rate": 1.9996375352212734e-05, "loss": 1.0242, "step": 272 }, { "epoch": 0.03846424797463896, "grad_norm": 1.2079413989743821, "learning_rate": 1.9996251449271974e-05, "loss": 0.959, "step": 273 }, { "epoch": 0.03860514265586474, "grad_norm": 1.0962492047729504, "learning_rate": 1.999612546445364e-05, "loss": 1.0178, "step": 274 }, { "epoch": 0.038746037337090526, "grad_norm": 1.3895625737930195, "learning_rate": 1.999599739778397e-05, "loss": 0.5657, "step": 275 }, { "epoch": 0.038886932018316306, "grad_norm": 1.2147946988836806, "learning_rate": 1.999586724928964e-05, "loss": 1.0322, "step": 276 }, { "epoch": 0.03902782669954209, "grad_norm": 1.3270165160597271, "learning_rate": 1.999573501899775e-05, "loss": 0.98, "step": 277 }, { "epoch": 0.03916872138076788, "grad_norm": 1.154604992625494, "learning_rate": 1.9995600706935848e-05, "loss": 0.98, "step": 278 }, { "epoch": 0.03930961606199366, "grad_norm": 1.1059387876615927, "learning_rate": 1.99954643131319e-05, "loss": 1.0121, "step": 279 }, { "epoch": 0.039450510743219445, "grad_norm": 1.2213122990184948, "learning_rate": 1.9995325837614313e-05, "loss": 1.0007, "step": 280 }, { "epoch": 0.039591405424445225, "grad_norm": 1.0958026872877604, "learning_rate": 1.9995185280411928e-05, "loss": 0.991, "step": 281 }, { "epoch": 0.03973230010567101, "grad_norm": 1.3417375309098898, "learning_rate": 1.9995042641554022e-05, "loss": 0.9841, "step": 282 }, { "epoch": 0.03987319478689679, "grad_norm": 1.2052348893902904, "learning_rate": 1.999489792107029e-05, "loss": 1.0353, "step": 283 }, { "epoch": 0.04001408946812258, "grad_norm": 1.1860504168210058, "learning_rate": 1.9994751118990887e-05, "loss": 1.061, "step": 284 }, { "epoch": 0.040154984149348365, "grad_norm": 1.3108833183728075, "learning_rate": 1.9994602235346375e-05, "loss": 0.9914, "step": 285 }, { "epoch": 0.040295878830574144, "grad_norm": 1.578912871658581, "learning_rate": 1.9994451270167764e-05, "loss": 1.0319, "step": 286 }, { "epoch": 0.04043677351179993, "grad_norm": 1.4033449481675229, "learning_rate": 1.9994298223486503e-05, "loss": 1.0241, "step": 287 }, { "epoch": 0.04057766819302571, "grad_norm": 1.233386343374802, "learning_rate": 1.9994143095334456e-05, "loss": 0.9836, "step": 288 }, { "epoch": 0.0407185628742515, "grad_norm": 1.208544392162586, "learning_rate": 1.999398588574394e-05, "loss": 0.9416, "step": 289 }, { "epoch": 0.040859457555477284, "grad_norm": 1.1632905043086101, "learning_rate": 1.9993826594747687e-05, "loss": 1.0318, "step": 290 }, { "epoch": 0.041000352236703064, "grad_norm": 1.1609697418366591, "learning_rate": 1.999366522237888e-05, "loss": 0.4477, "step": 291 }, { "epoch": 0.04114124691792885, "grad_norm": 1.5110963974055136, "learning_rate": 1.999350176867112e-05, "loss": 0.5946, "step": 292 }, { "epoch": 0.04128214159915463, "grad_norm": 1.0036354399140175, "learning_rate": 1.9993336233658456e-05, "loss": 0.3687, "step": 293 }, { "epoch": 0.04142303628038042, "grad_norm": 1.4491929837047002, "learning_rate": 1.999316861737536e-05, "loss": 1.0041, "step": 294 }, { "epoch": 0.041563930961606196, "grad_norm": 1.3941236761868363, "learning_rate": 1.9992998919856738e-05, "loss": 0.5871, "step": 295 }, { "epoch": 0.04170482564283198, "grad_norm": 1.2853537670978716, "learning_rate": 1.9992827141137935e-05, "loss": 0.965, "step": 296 }, { "epoch": 0.04184572032405777, "grad_norm": 1.3474888460772236, "learning_rate": 1.999265328125473e-05, "loss": 1.0186, "step": 297 }, { "epoch": 0.04198661500528355, "grad_norm": 1.3200668822753001, "learning_rate": 1.9992477340243325e-05, "loss": 0.9546, "step": 298 }, { "epoch": 0.042127509686509336, "grad_norm": 1.0759051746084303, "learning_rate": 1.999229931814037e-05, "loss": 0.9846, "step": 299 }, { "epoch": 0.042268404367735116, "grad_norm": 1.138276135251764, "learning_rate": 1.9992119214982932e-05, "loss": 1.0782, "step": 300 }, { "epoch": 0.0424092990489609, "grad_norm": 1.2763719271790568, "learning_rate": 1.9991937030808532e-05, "loss": 0.4497, "step": 301 }, { "epoch": 0.04255019373018668, "grad_norm": 1.4881952946440582, "learning_rate": 1.99917527656551e-05, "loss": 0.5029, "step": 302 }, { "epoch": 0.04269108841141247, "grad_norm": 1.4007927924286825, "learning_rate": 1.999156641956102e-05, "loss": 0.9935, "step": 303 }, { "epoch": 0.042831983092638255, "grad_norm": 1.1832056018760377, "learning_rate": 1.9991377992565103e-05, "loss": 1.0391, "step": 304 }, { "epoch": 0.042972877773864035, "grad_norm": 1.4882696476166388, "learning_rate": 1.9991187484706588e-05, "loss": 0.5173, "step": 305 }, { "epoch": 0.04311377245508982, "grad_norm": 1.2109950712783157, "learning_rate": 1.999099489602515e-05, "loss": 0.9712, "step": 306 }, { "epoch": 0.0432546671363156, "grad_norm": 1.256543587731877, "learning_rate": 1.9990800226560898e-05, "loss": 0.9609, "step": 307 }, { "epoch": 0.04339556181754139, "grad_norm": 1.0910863363645584, "learning_rate": 1.9990603476354378e-05, "loss": 0.3912, "step": 308 }, { "epoch": 0.043536456498767175, "grad_norm": 1.246930265800913, "learning_rate": 1.9990404645446572e-05, "loss": 1.0018, "step": 309 }, { "epoch": 0.043677351179992954, "grad_norm": 1.1959220897103429, "learning_rate": 1.999020373387888e-05, "loss": 1.0107, "step": 310 }, { "epoch": 0.04381824586121874, "grad_norm": 1.225784154869408, "learning_rate": 1.999000074169315e-05, "loss": 1.0169, "step": 311 }, { "epoch": 0.04395914054244452, "grad_norm": 1.2074640293217693, "learning_rate": 1.9989795668931652e-05, "loss": 0.9759, "step": 312 }, { "epoch": 0.04410003522367031, "grad_norm": 1.3583398710805064, "learning_rate": 1.9989588515637107e-05, "loss": 1.0595, "step": 313 }, { "epoch": 0.04424092990489609, "grad_norm": 1.2904502399392173, "learning_rate": 1.9989379281852647e-05, "loss": 0.4662, "step": 314 }, { "epoch": 0.044381824586121874, "grad_norm": 1.2055757176261148, "learning_rate": 1.9989167967621858e-05, "loss": 1.018, "step": 315 }, { "epoch": 0.04452271926734766, "grad_norm": 1.1552178151446717, "learning_rate": 1.9988954572988738e-05, "loss": 0.9761, "step": 316 }, { "epoch": 0.04466361394857344, "grad_norm": 1.2589546868714285, "learning_rate": 1.9988739097997742e-05, "loss": 0.5579, "step": 317 }, { "epoch": 0.044804508629799226, "grad_norm": 1.306551562876987, "learning_rate": 1.9988521542693736e-05, "loss": 0.5497, "step": 318 }, { "epoch": 0.044945403311025006, "grad_norm": 1.2420769223058947, "learning_rate": 1.9988301907122033e-05, "loss": 0.4918, "step": 319 }, { "epoch": 0.04508629799225079, "grad_norm": 1.3075304110860597, "learning_rate": 1.998808019132838e-05, "loss": 1.0041, "step": 320 }, { "epoch": 0.04522719267347658, "grad_norm": 1.1851129715058262, "learning_rate": 1.9987856395358945e-05, "loss": 0.9806, "step": 321 }, { "epoch": 0.04536808735470236, "grad_norm": 1.417553880132584, "learning_rate": 1.9987630519260345e-05, "loss": 0.5334, "step": 322 }, { "epoch": 0.045508982035928146, "grad_norm": 1.1446365112121037, "learning_rate": 1.9987402563079616e-05, "loss": 1.0078, "step": 323 }, { "epoch": 0.045649876717153925, "grad_norm": 1.3189714113287423, "learning_rate": 1.9987172526864235e-05, "loss": 0.992, "step": 324 }, { "epoch": 0.04579077139837971, "grad_norm": 1.317197817521072, "learning_rate": 1.9986940410662114e-05, "loss": 1.0425, "step": 325 }, { "epoch": 0.04593166607960549, "grad_norm": 1.2777850575415641, "learning_rate": 1.9986706214521593e-05, "loss": 0.9799, "step": 326 }, { "epoch": 0.04607256076083128, "grad_norm": 1.2652863858671304, "learning_rate": 1.9986469938491443e-05, "loss": 0.9759, "step": 327 }, { "epoch": 0.046213455442057065, "grad_norm": 1.5018471465300813, "learning_rate": 1.998623158262088e-05, "loss": 0.6632, "step": 328 }, { "epoch": 0.046354350123282845, "grad_norm": 1.1844110509710497, "learning_rate": 1.998599114695954e-05, "loss": 0.9771, "step": 329 }, { "epoch": 0.04649524480450863, "grad_norm": 1.3212675594992134, "learning_rate": 1.9985748631557495e-05, "loss": 0.4818, "step": 330 }, { "epoch": 0.04663613948573441, "grad_norm": 1.381389789654548, "learning_rate": 1.9985504036465258e-05, "loss": 0.9915, "step": 331 }, { "epoch": 0.0467770341669602, "grad_norm": 1.1326752713815145, "learning_rate": 1.9985257361733767e-05, "loss": 1.0022, "step": 332 }, { "epoch": 0.046917928848185984, "grad_norm": 1.20181671905511, "learning_rate": 1.99850086074144e-05, "loss": 0.943, "step": 333 }, { "epoch": 0.047058823529411764, "grad_norm": 1.1534232905231603, "learning_rate": 1.998475777355896e-05, "loss": 1.0299, "step": 334 }, { "epoch": 0.04719971821063755, "grad_norm": 1.253933860534097, "learning_rate": 1.9984504860219684e-05, "loss": 0.9878, "step": 335 }, { "epoch": 0.04734061289186333, "grad_norm": 1.3796147318395846, "learning_rate": 1.9984249867449255e-05, "loss": 0.4581, "step": 336 }, { "epoch": 0.04748150757308912, "grad_norm": 1.1751616618785223, "learning_rate": 1.998399279530077e-05, "loss": 0.9861, "step": 337 }, { "epoch": 0.0476224022543149, "grad_norm": 1.3633873759778203, "learning_rate": 1.998373364382777e-05, "loss": 1.0081, "step": 338 }, { "epoch": 0.04776329693554068, "grad_norm": 1.3215671990095859, "learning_rate": 1.9983472413084236e-05, "loss": 0.553, "step": 339 }, { "epoch": 0.04790419161676647, "grad_norm": 1.122062289666149, "learning_rate": 1.998320910312456e-05, "loss": 0.9337, "step": 340 }, { "epoch": 0.04804508629799225, "grad_norm": 1.0840655442684122, "learning_rate": 1.998294371400359e-05, "loss": 0.9568, "step": 341 }, { "epoch": 0.048185980979218036, "grad_norm": 1.1703913068473004, "learning_rate": 1.9982676245776596e-05, "loss": 1.0044, "step": 342 }, { "epoch": 0.048326875660443816, "grad_norm": 1.34833461052654, "learning_rate": 1.9982406698499283e-05, "loss": 0.5133, "step": 343 }, { "epoch": 0.0484677703416696, "grad_norm": 1.1104812198624363, "learning_rate": 1.998213507222778e-05, "loss": 0.9786, "step": 344 }, { "epoch": 0.04860866502289538, "grad_norm": 1.3291763059354271, "learning_rate": 1.998186136701867e-05, "loss": 0.4992, "step": 345 }, { "epoch": 0.04874955970412117, "grad_norm": 1.205863063071479, "learning_rate": 1.998158558292895e-05, "loss": 0.4709, "step": 346 }, { "epoch": 0.048890454385346956, "grad_norm": 1.0809791589207176, "learning_rate": 1.998130772001605e-05, "loss": 1.0012, "step": 347 }, { "epoch": 0.049031349066572735, "grad_norm": 1.4841300678314053, "learning_rate": 1.9981027778337855e-05, "loss": 0.6537, "step": 348 }, { "epoch": 0.04917224374779852, "grad_norm": 1.1771855577783652, "learning_rate": 1.9980745757952657e-05, "loss": 0.9913, "step": 349 }, { "epoch": 0.0493131384290243, "grad_norm": 1.2611414533813468, "learning_rate": 1.998046165891919e-05, "loss": 0.5258, "step": 350 }, { "epoch": 0.04945403311025009, "grad_norm": 1.1960086691133456, "learning_rate": 1.9980175481296627e-05, "loss": 0.9222, "step": 351 }, { "epoch": 0.049594927791475875, "grad_norm": 1.2371687840233672, "learning_rate": 1.9979887225144565e-05, "loss": 0.4986, "step": 352 }, { "epoch": 0.049735822472701655, "grad_norm": 1.189624690194973, "learning_rate": 1.997959689052304e-05, "loss": 0.4482, "step": 353 }, { "epoch": 0.04987671715392744, "grad_norm": 1.2266141974634683, "learning_rate": 1.997930447749252e-05, "loss": 1.0369, "step": 354 }, { "epoch": 0.05001761183515322, "grad_norm": 1.2834963025122028, "learning_rate": 1.99790099861139e-05, "loss": 0.5076, "step": 355 }, { "epoch": 0.05015850651637901, "grad_norm": 1.071423307177304, "learning_rate": 1.997871341644852e-05, "loss": 0.9403, "step": 356 }, { "epoch": 0.05029940119760479, "grad_norm": 1.4877155972526122, "learning_rate": 1.9978414768558143e-05, "loss": 0.9863, "step": 357 }, { "epoch": 0.050440295878830574, "grad_norm": 1.0265882247754696, "learning_rate": 1.997811404250496e-05, "loss": 0.9387, "step": 358 }, { "epoch": 0.05058119056005636, "grad_norm": 1.0978821473053981, "learning_rate": 1.9977811238351613e-05, "loss": 0.9869, "step": 359 }, { "epoch": 0.05072208524128214, "grad_norm": 1.1774557982476967, "learning_rate": 1.9977506356161155e-05, "loss": 1.0195, "step": 360 }, { "epoch": 0.05086297992250793, "grad_norm": 1.5780742462386772, "learning_rate": 1.9977199395997088e-05, "loss": 0.5801, "step": 361 }, { "epoch": 0.051003874603733707, "grad_norm": 1.2966981547038965, "learning_rate": 1.997689035792334e-05, "loss": 0.9818, "step": 362 }, { "epoch": 0.05114476928495949, "grad_norm": 1.2007444461211496, "learning_rate": 1.9976579242004276e-05, "loss": 0.9884, "step": 363 }, { "epoch": 0.05128566396618528, "grad_norm": 1.28452581731371, "learning_rate": 1.9976266048304687e-05, "loss": 0.5117, "step": 364 }, { "epoch": 0.05142655864741106, "grad_norm": 1.1649544777001735, "learning_rate": 1.9975950776889806e-05, "loss": 0.9995, "step": 365 }, { "epoch": 0.051567453328636846, "grad_norm": 1.2273793274205405, "learning_rate": 1.997563342782529e-05, "loss": 0.9733, "step": 366 }, { "epoch": 0.051708348009862626, "grad_norm": 1.4721690161782002, "learning_rate": 1.9975314001177227e-05, "loss": 0.5652, "step": 367 }, { "epoch": 0.05184924269108841, "grad_norm": 1.1336541452913795, "learning_rate": 1.9974992497012146e-05, "loss": 0.9823, "step": 368 }, { "epoch": 0.05199013737231419, "grad_norm": 1.0475865945427363, "learning_rate": 1.997466891539701e-05, "loss": 0.9123, "step": 369 }, { "epoch": 0.05213103205353998, "grad_norm": 1.4813222141867892, "learning_rate": 1.9974343256399208e-05, "loss": 0.4986, "step": 370 }, { "epoch": 0.052271926734765765, "grad_norm": 1.4060623315705116, "learning_rate": 1.9974015520086558e-05, "loss": 1.0317, "step": 371 }, { "epoch": 0.052412821415991545, "grad_norm": 1.3168631501551649, "learning_rate": 1.9973685706527322e-05, "loss": 1.0082, "step": 372 }, { "epoch": 0.05255371609721733, "grad_norm": 1.1551278403477996, "learning_rate": 1.997335381579019e-05, "loss": 0.9735, "step": 373 }, { "epoch": 0.05269461077844311, "grad_norm": 1.381727045633314, "learning_rate": 1.997301984794428e-05, "loss": 0.4852, "step": 374 }, { "epoch": 0.0528355054596689, "grad_norm": 1.2058137501909925, "learning_rate": 1.997268380305914e-05, "loss": 1.0239, "step": 375 }, { "epoch": 0.052976400140894685, "grad_norm": 1.5146439887134562, "learning_rate": 1.997234568120477e-05, "loss": 0.6923, "step": 376 }, { "epoch": 0.053117294822120464, "grad_norm": 1.4019609811739828, "learning_rate": 1.997200548245158e-05, "loss": 0.5952, "step": 377 }, { "epoch": 0.05325818950334625, "grad_norm": 1.2044350068025955, "learning_rate": 1.9971663206870426e-05, "loss": 1.0595, "step": 378 }, { "epoch": 0.05339908418457203, "grad_norm": 1.0599231952147887, "learning_rate": 1.997131885453259e-05, "loss": 0.3906, "step": 379 }, { "epoch": 0.05353997886579782, "grad_norm": 1.180789018658934, "learning_rate": 1.9970972425509792e-05, "loss": 0.9281, "step": 380 }, { "epoch": 0.0536808735470236, "grad_norm": 1.395719142492717, "learning_rate": 1.9970623919874177e-05, "loss": 0.4777, "step": 381 }, { "epoch": 0.053821768228249384, "grad_norm": 1.5336525076132634, "learning_rate": 1.9970273337698328e-05, "loss": 0.6308, "step": 382 }, { "epoch": 0.05396266290947517, "grad_norm": 1.232450207194063, "learning_rate": 1.9969920679055263e-05, "loss": 0.4354, "step": 383 }, { "epoch": 0.05410355759070095, "grad_norm": 1.447058187545903, "learning_rate": 1.9969565944018423e-05, "loss": 0.5864, "step": 384 }, { "epoch": 0.05424445227192674, "grad_norm": 1.1115294052110054, "learning_rate": 1.996920913266169e-05, "loss": 0.9883, "step": 385 }, { "epoch": 0.054385346953152516, "grad_norm": 1.1497571541876517, "learning_rate": 1.9968850245059376e-05, "loss": 0.962, "step": 386 }, { "epoch": 0.0545262416343783, "grad_norm": 1.3340381435444508, "learning_rate": 1.996848928128623e-05, "loss": 1.0142, "step": 387 }, { "epoch": 0.05466713631560408, "grad_norm": 1.3376824752321883, "learning_rate": 1.9968126241417415e-05, "loss": 0.5096, "step": 388 }, { "epoch": 0.05480803099682987, "grad_norm": 1.170887248904365, "learning_rate": 1.9967761125528554e-05, "loss": 0.9819, "step": 389 }, { "epoch": 0.054948925678055656, "grad_norm": 1.688822735053544, "learning_rate": 1.996739393369568e-05, "loss": 0.6562, "step": 390 }, { "epoch": 0.055089820359281436, "grad_norm": 1.1752577110831222, "learning_rate": 1.9967024665995267e-05, "loss": 0.4597, "step": 391 }, { "epoch": 0.05523071504050722, "grad_norm": 1.1539770601213168, "learning_rate": 1.996665332250422e-05, "loss": 0.9989, "step": 392 }, { "epoch": 0.055371609721733, "grad_norm": 1.367292437623798, "learning_rate": 1.9966279903299887e-05, "loss": 0.9612, "step": 393 }, { "epoch": 0.05551250440295879, "grad_norm": 1.3337438002191575, "learning_rate": 1.9965904408460025e-05, "loss": 1.0267, "step": 394 }, { "epoch": 0.055653399084184575, "grad_norm": 1.0323608610256385, "learning_rate": 1.9965526838062846e-05, "loss": 0.9588, "step": 395 }, { "epoch": 0.055794293765410355, "grad_norm": 1.0930301446232118, "learning_rate": 1.996514719218698e-05, "loss": 1.0197, "step": 396 }, { "epoch": 0.05593518844663614, "grad_norm": 1.083767308137378, "learning_rate": 1.9964765470911494e-05, "loss": 0.9873, "step": 397 }, { "epoch": 0.05607608312786192, "grad_norm": 1.1760514398157365, "learning_rate": 1.996438167431589e-05, "loss": 0.9834, "step": 398 }, { "epoch": 0.05621697780908771, "grad_norm": 1.454868856977096, "learning_rate": 1.9963995802480105e-05, "loss": 0.5902, "step": 399 }, { "epoch": 0.05635787249031349, "grad_norm": 1.1656225424090283, "learning_rate": 1.9963607855484492e-05, "loss": 0.9481, "step": 400 }, { "epoch": 0.056498767171539274, "grad_norm": 1.4142372730958697, "learning_rate": 1.9963217833409854e-05, "loss": 0.5947, "step": 401 }, { "epoch": 0.05663966185276506, "grad_norm": 1.0807469568213401, "learning_rate": 1.9962825736337416e-05, "loss": 1.0237, "step": 402 }, { "epoch": 0.05678055653399084, "grad_norm": 1.130453196005577, "learning_rate": 1.9962431564348842e-05, "loss": 0.9849, "step": 403 }, { "epoch": 0.05692145121521663, "grad_norm": 1.3430518928970059, "learning_rate": 1.996203531752622e-05, "loss": 0.6141, "step": 404 }, { "epoch": 0.05706234589644241, "grad_norm": 1.2269950803339225, "learning_rate": 1.996163699595208e-05, "loss": 0.9286, "step": 405 }, { "epoch": 0.057203240577668193, "grad_norm": 1.3371871534025144, "learning_rate": 1.9961236599709376e-05, "loss": 0.6006, "step": 406 }, { "epoch": 0.05734413525889398, "grad_norm": 1.2240652498816729, "learning_rate": 1.9960834128881494e-05, "loss": 0.9987, "step": 407 }, { "epoch": 0.05748502994011976, "grad_norm": 1.2731045630379911, "learning_rate": 1.9960429583552262e-05, "loss": 0.9559, "step": 408 }, { "epoch": 0.057625924621345546, "grad_norm": 1.200141028231138, "learning_rate": 1.996002296380592e-05, "loss": 0.9714, "step": 409 }, { "epoch": 0.057766819302571326, "grad_norm": 1.2050126066058668, "learning_rate": 1.9959614269727172e-05, "loss": 0.9214, "step": 410 }, { "epoch": 0.05790771398379711, "grad_norm": 1.1587330007791727, "learning_rate": 1.9959203501401118e-05, "loss": 0.9923, "step": 411 }, { "epoch": 0.05804860866502289, "grad_norm": 1.0650120592006553, "learning_rate": 1.9958790658913315e-05, "loss": 0.4261, "step": 412 }, { "epoch": 0.05818950334624868, "grad_norm": 1.470218772673152, "learning_rate": 1.9958375742349748e-05, "loss": 0.5677, "step": 413 }, { "epoch": 0.058330398027474466, "grad_norm": 1.1588697471617853, "learning_rate": 1.9957958751796818e-05, "loss": 1.0061, "step": 414 }, { "epoch": 0.058471292708700245, "grad_norm": 1.2388394170278358, "learning_rate": 1.995753968734138e-05, "loss": 0.9687, "step": 415 }, { "epoch": 0.05861218738992603, "grad_norm": 1.2155233327367367, "learning_rate": 1.9957118549070705e-05, "loss": 0.9963, "step": 416 }, { "epoch": 0.05875308207115181, "grad_norm": 1.2080183226793901, "learning_rate": 1.9956695337072504e-05, "loss": 1.0197, "step": 417 }, { "epoch": 0.0588939767523776, "grad_norm": 1.4044448498252506, "learning_rate": 1.995627005143492e-05, "loss": 0.5345, "step": 418 }, { "epoch": 0.05903487143360338, "grad_norm": 1.238058709957381, "learning_rate": 1.995584269224652e-05, "loss": 0.9682, "step": 419 }, { "epoch": 0.059175766114829165, "grad_norm": 1.1308819242046593, "learning_rate": 1.9955413259596314e-05, "loss": 1.0004, "step": 420 }, { "epoch": 0.05931666079605495, "grad_norm": 1.222750148573153, "learning_rate": 1.9954981753573733e-05, "loss": 0.9842, "step": 421 }, { "epoch": 0.05945755547728073, "grad_norm": 1.4344901084935178, "learning_rate": 1.995454817426865e-05, "loss": 0.4959, "step": 422 }, { "epoch": 0.05959845015850652, "grad_norm": 1.1677949005407573, "learning_rate": 1.995411252177136e-05, "loss": 0.9399, "step": 423 }, { "epoch": 0.0597393448397323, "grad_norm": 1.3430679764982074, "learning_rate": 1.99536747961726e-05, "loss": 0.5204, "step": 424 }, { "epoch": 0.059880239520958084, "grad_norm": 1.1289560287137854, "learning_rate": 1.995323499756353e-05, "loss": 1.0214, "step": 425 }, { "epoch": 0.06002113420218387, "grad_norm": 1.375311739769213, "learning_rate": 1.9952793126035742e-05, "loss": 0.7101, "step": 426 }, { "epoch": 0.06016202888340965, "grad_norm": 1.1471992596706517, "learning_rate": 1.995234918168127e-05, "loss": 0.9745, "step": 427 }, { "epoch": 0.06030292356463544, "grad_norm": 1.22653506312674, "learning_rate": 1.9951903164592562e-05, "loss": 1.0265, "step": 428 }, { "epoch": 0.06044381824586122, "grad_norm": 1.516156380315246, "learning_rate": 1.9951455074862517e-05, "loss": 0.6449, "step": 429 }, { "epoch": 0.060584712927087, "grad_norm": 1.2375559583618885, "learning_rate": 1.9951004912584455e-05, "loss": 1.0039, "step": 430 }, { "epoch": 0.06072560760831278, "grad_norm": 1.0885405459189321, "learning_rate": 1.995055267785213e-05, "loss": 0.9718, "step": 431 }, { "epoch": 0.06086650228953857, "grad_norm": 1.1309883252166721, "learning_rate": 1.9950098370759723e-05, "loss": 0.9395, "step": 432 }, { "epoch": 0.061007396970764356, "grad_norm": 1.4097952212120801, "learning_rate": 1.9949641991401854e-05, "loss": 0.4423, "step": 433 }, { "epoch": 0.061148291651990136, "grad_norm": 1.1968005648270568, "learning_rate": 1.994918353987357e-05, "loss": 0.4543, "step": 434 }, { "epoch": 0.06128918633321592, "grad_norm": 1.2010360161553661, "learning_rate": 1.9948723016270355e-05, "loss": 1.0094, "step": 435 }, { "epoch": 0.0614300810144417, "grad_norm": 1.3529123910643952, "learning_rate": 1.9948260420688113e-05, "loss": 0.5601, "step": 436 }, { "epoch": 0.06157097569566749, "grad_norm": 1.2292137614831056, "learning_rate": 1.994779575322319e-05, "loss": 0.9975, "step": 437 }, { "epoch": 0.061711870376893276, "grad_norm": 1.328519040031899, "learning_rate": 1.994732901397236e-05, "loss": 0.4708, "step": 438 }, { "epoch": 0.061852765058119055, "grad_norm": 1.4555102952729253, "learning_rate": 1.9946860203032833e-05, "loss": 0.4378, "step": 439 }, { "epoch": 0.06199365973934484, "grad_norm": 1.4205477158203765, "learning_rate": 1.9946389320502243e-05, "loss": 0.5711, "step": 440 }, { "epoch": 0.06213455442057062, "grad_norm": 1.3181945792483436, "learning_rate": 1.994591636647866e-05, "loss": 0.9785, "step": 441 }, { "epoch": 0.06227544910179641, "grad_norm": 1.0762121220255894, "learning_rate": 1.9945441341060577e-05, "loss": 0.9535, "step": 442 }, { "epoch": 0.06241634378302219, "grad_norm": 1.3639294833194708, "learning_rate": 1.9944964244346936e-05, "loss": 0.9754, "step": 443 }, { "epoch": 0.06255723846424797, "grad_norm": 1.2245181529808828, "learning_rate": 1.994448507643709e-05, "loss": 0.4455, "step": 444 }, { "epoch": 0.06269813314547376, "grad_norm": 1.2302594250338952, "learning_rate": 1.9944003837430845e-05, "loss": 0.953, "step": 445 }, { "epoch": 0.06283902782669955, "grad_norm": 1.1819451404709163, "learning_rate": 1.994352052742842e-05, "loss": 0.9417, "step": 446 }, { "epoch": 0.06297992250792532, "grad_norm": 1.08840093258579, "learning_rate": 1.994303514653047e-05, "loss": 0.9155, "step": 447 }, { "epoch": 0.06312081718915111, "grad_norm": 1.3544606756786628, "learning_rate": 1.9942547694838084e-05, "loss": 1.0013, "step": 448 }, { "epoch": 0.0632617118703769, "grad_norm": 1.0766850671998276, "learning_rate": 1.9942058172452783e-05, "loss": 1.0081, "step": 449 }, { "epoch": 0.06340260655160268, "grad_norm": 1.5475738066053926, "learning_rate": 1.994156657947652e-05, "loss": 0.5986, "step": 450 }, { "epoch": 0.06354350123282847, "grad_norm": 1.1458178541743471, "learning_rate": 1.9941072916011673e-05, "loss": 1.0422, "step": 451 }, { "epoch": 0.06368439591405424, "grad_norm": 1.4265688369897027, "learning_rate": 1.994057718216106e-05, "loss": 0.4766, "step": 452 }, { "epoch": 0.06382529059528003, "grad_norm": 1.3021377783967558, "learning_rate": 1.994007937802792e-05, "loss": 1.0503, "step": 453 }, { "epoch": 0.06396618527650581, "grad_norm": 1.3937818616487911, "learning_rate": 1.9939579503715932e-05, "loss": 0.5941, "step": 454 }, { "epoch": 0.0641070799577316, "grad_norm": 1.4481621977602241, "learning_rate": 1.9939077559329202e-05, "loss": 0.6455, "step": 455 }, { "epoch": 0.06424797463895737, "grad_norm": 1.205162780555382, "learning_rate": 1.9938573544972266e-05, "loss": 0.9679, "step": 456 }, { "epoch": 0.06438886932018316, "grad_norm": 1.0800491726543078, "learning_rate": 1.99380674607501e-05, "loss": 0.9729, "step": 457 }, { "epoch": 0.06452976400140895, "grad_norm": 1.2498696395075128, "learning_rate": 1.993755930676809e-05, "loss": 0.5122, "step": 458 }, { "epoch": 0.06467065868263473, "grad_norm": 1.2208285145962559, "learning_rate": 1.9937049083132082e-05, "loss": 0.9804, "step": 459 }, { "epoch": 0.06481155336386052, "grad_norm": 1.0985225788679276, "learning_rate": 1.993653678994833e-05, "loss": 0.9528, "step": 460 }, { "epoch": 0.06495244804508629, "grad_norm": 1.2561249293073535, "learning_rate": 1.9936022427323528e-05, "loss": 0.9637, "step": 461 }, { "epoch": 0.06509334272631208, "grad_norm": 1.3410052914100468, "learning_rate": 1.9935505995364804e-05, "loss": 0.47, "step": 462 }, { "epoch": 0.06523423740753787, "grad_norm": 1.3056085898651753, "learning_rate": 1.993498749417971e-05, "loss": 0.4878, "step": 463 }, { "epoch": 0.06537513208876365, "grad_norm": 1.4132715917381748, "learning_rate": 1.993446692387623e-05, "loss": 0.5599, "step": 464 }, { "epoch": 0.06551602676998944, "grad_norm": 1.313421218792048, "learning_rate": 1.9933944284562785e-05, "loss": 0.5502, "step": 465 }, { "epoch": 0.06565692145121521, "grad_norm": 1.4007118349983894, "learning_rate": 1.993341957634822e-05, "loss": 1.0364, "step": 466 }, { "epoch": 0.065797816132441, "grad_norm": 1.4282441171192153, "learning_rate": 1.9932892799341816e-05, "loss": 0.5909, "step": 467 }, { "epoch": 0.06593871081366678, "grad_norm": 1.348754728686354, "learning_rate": 1.9932363953653282e-05, "loss": 0.5102, "step": 468 }, { "epoch": 0.06607960549489257, "grad_norm": 1.1598362965741205, "learning_rate": 1.993183303939276e-05, "loss": 0.9878, "step": 469 }, { "epoch": 0.06622050017611836, "grad_norm": 1.1795169371391399, "learning_rate": 1.993130005667082e-05, "loss": 0.9842, "step": 470 }, { "epoch": 0.06636139485734413, "grad_norm": 1.1819918904429472, "learning_rate": 1.993076500559846e-05, "loss": 0.9683, "step": 471 }, { "epoch": 0.06650228953856992, "grad_norm": 1.0935513422919307, "learning_rate": 1.9930227886287115e-05, "loss": 0.9443, "step": 472 }, { "epoch": 0.0666431842197957, "grad_norm": 1.640412259621592, "learning_rate": 1.9929688698848654e-05, "loss": 0.4913, "step": 473 }, { "epoch": 0.06678407890102149, "grad_norm": 1.2363492600276846, "learning_rate": 1.992914744339537e-05, "loss": 0.9642, "step": 474 }, { "epoch": 0.06692497358224728, "grad_norm": 1.2126683284530326, "learning_rate": 1.992860412003998e-05, "loss": 1.0465, "step": 475 }, { "epoch": 0.06706586826347305, "grad_norm": 1.0438804041701115, "learning_rate": 1.992805872889565e-05, "loss": 0.9535, "step": 476 }, { "epoch": 0.06720676294469884, "grad_norm": 1.289957919819202, "learning_rate": 1.992751127007596e-05, "loss": 0.9631, "step": 477 }, { "epoch": 0.06734765762592462, "grad_norm": 1.376368129623413, "learning_rate": 1.9926961743694925e-05, "loss": 0.4755, "step": 478 }, { "epoch": 0.06748855230715041, "grad_norm": 1.4260025916811947, "learning_rate": 1.9926410149867e-05, "loss": 0.5483, "step": 479 }, { "epoch": 0.06762944698837618, "grad_norm": 1.3913846321741337, "learning_rate": 1.9925856488707055e-05, "loss": 0.9708, "step": 480 }, { "epoch": 0.06777034166960197, "grad_norm": 1.2330336618200681, "learning_rate": 1.9925300760330408e-05, "loss": 0.4729, "step": 481 }, { "epoch": 0.06791123635082776, "grad_norm": 1.2905992456613313, "learning_rate": 1.9924742964852788e-05, "loss": 1.0159, "step": 482 }, { "epoch": 0.06805213103205354, "grad_norm": 1.2618690448107657, "learning_rate": 1.9924183102390374e-05, "loss": 1.0085, "step": 483 }, { "epoch": 0.06819302571327933, "grad_norm": 1.3795342801659431, "learning_rate": 1.9923621173059762e-05, "loss": 0.9777, "step": 484 }, { "epoch": 0.0683339203945051, "grad_norm": 1.193937685727893, "learning_rate": 1.9923057176977978e-05, "loss": 0.9982, "step": 485 }, { "epoch": 0.06847481507573089, "grad_norm": 1.3215879565490587, "learning_rate": 1.992249111426249e-05, "loss": 0.5162, "step": 486 }, { "epoch": 0.06861570975695667, "grad_norm": 1.068127607896887, "learning_rate": 1.992192298503119e-05, "loss": 0.9836, "step": 487 }, { "epoch": 0.06875660443818246, "grad_norm": 1.2678446195570248, "learning_rate": 1.99213527894024e-05, "loss": 0.4984, "step": 488 }, { "epoch": 0.06889749911940825, "grad_norm": 1.2527646788316682, "learning_rate": 1.992078052749486e-05, "loss": 0.4392, "step": 489 }, { "epoch": 0.06903839380063402, "grad_norm": 1.1750617013176885, "learning_rate": 1.992020619942777e-05, "loss": 1.0277, "step": 490 }, { "epoch": 0.06917928848185981, "grad_norm": 1.2403261887802266, "learning_rate": 1.991962980532073e-05, "loss": 0.9756, "step": 491 }, { "epoch": 0.0693201831630856, "grad_norm": 1.1336572083329832, "learning_rate": 1.991905134529379e-05, "loss": 0.9454, "step": 492 }, { "epoch": 0.06946107784431138, "grad_norm": 1.2873883528969436, "learning_rate": 1.9918470819467423e-05, "loss": 0.4218, "step": 493 }, { "epoch": 0.06960197252553717, "grad_norm": 1.1278418087751925, "learning_rate": 1.991788822796253e-05, "loss": 0.9536, "step": 494 }, { "epoch": 0.06974286720676294, "grad_norm": 1.1067257991262338, "learning_rate": 1.991730357090045e-05, "loss": 1.0073, "step": 495 }, { "epoch": 0.06988376188798873, "grad_norm": 1.1572552494377861, "learning_rate": 1.9916716848402937e-05, "loss": 0.9898, "step": 496 }, { "epoch": 0.07002465656921451, "grad_norm": 1.2255026609403836, "learning_rate": 1.9916128060592198e-05, "loss": 0.9905, "step": 497 }, { "epoch": 0.0701655512504403, "grad_norm": 1.1467055054008317, "learning_rate": 1.9915537207590845e-05, "loss": 0.9815, "step": 498 }, { "epoch": 0.07030644593166607, "grad_norm": 1.2922177243096993, "learning_rate": 1.991494428952194e-05, "loss": 0.466, "step": 499 }, { "epoch": 0.07044734061289186, "grad_norm": 1.3826989246718029, "learning_rate": 1.991434930650897e-05, "loss": 0.9481, "step": 500 }, { "epoch": 0.07058823529411765, "grad_norm": 1.2571607373417293, "learning_rate": 1.9913752258675843e-05, "loss": 0.4296, "step": 501 }, { "epoch": 0.07072912997534343, "grad_norm": 1.1205382631037277, "learning_rate": 1.9913153146146908e-05, "loss": 0.966, "step": 502 }, { "epoch": 0.07087002465656922, "grad_norm": 1.1149616456699456, "learning_rate": 1.9912551969046934e-05, "loss": 1.0136, "step": 503 }, { "epoch": 0.07101091933779499, "grad_norm": 1.0949791796194008, "learning_rate": 1.9911948727501134e-05, "loss": 0.9764, "step": 504 }, { "epoch": 0.07115181401902078, "grad_norm": 1.082557266850204, "learning_rate": 1.9911343421635133e-05, "loss": 0.9501, "step": 505 }, { "epoch": 0.07129270870024657, "grad_norm": 1.0687223195045952, "learning_rate": 1.9910736051575007e-05, "loss": 0.9305, "step": 506 }, { "epoch": 0.07143360338147235, "grad_norm": 1.12381737829494, "learning_rate": 1.991012661744724e-05, "loss": 0.3771, "step": 507 }, { "epoch": 0.07157449806269814, "grad_norm": 1.2535734474318079, "learning_rate": 1.990951511937876e-05, "loss": 0.4728, "step": 508 }, { "epoch": 0.07171539274392391, "grad_norm": 1.2269784597978541, "learning_rate": 1.9908901557496928e-05, "loss": 0.4722, "step": 509 }, { "epoch": 0.0718562874251497, "grad_norm": 1.08423077316261, "learning_rate": 1.9908285931929514e-05, "loss": 0.9928, "step": 510 }, { "epoch": 0.07199718210637548, "grad_norm": 1.1557150161517022, "learning_rate": 1.9907668242804744e-05, "loss": 0.9863, "step": 511 }, { "epoch": 0.07213807678760127, "grad_norm": 1.3215704215872073, "learning_rate": 1.9907048490251254e-05, "loss": 0.6106, "step": 512 }, { "epoch": 0.07227897146882706, "grad_norm": 1.431989692504383, "learning_rate": 1.9906426674398123e-05, "loss": 0.5666, "step": 513 }, { "epoch": 0.07241986615005283, "grad_norm": 1.5350647274054694, "learning_rate": 1.9905802795374848e-05, "loss": 0.6422, "step": 514 }, { "epoch": 0.07256076083127862, "grad_norm": 1.3538395223946489, "learning_rate": 1.9905176853311367e-05, "loss": 0.5778, "step": 515 }, { "epoch": 0.0727016555125044, "grad_norm": 1.4412930770647825, "learning_rate": 1.990454884833804e-05, "loss": 0.53, "step": 516 }, { "epoch": 0.07284255019373019, "grad_norm": 1.4331621733055853, "learning_rate": 1.990391878058566e-05, "loss": 0.5158, "step": 517 }, { "epoch": 0.07298344487495598, "grad_norm": 1.1889457842714897, "learning_rate": 1.9903286650185446e-05, "loss": 0.9646, "step": 518 }, { "epoch": 0.07312433955618175, "grad_norm": 1.0740284867051693, "learning_rate": 1.9902652457269053e-05, "loss": 1.0095, "step": 519 }, { "epoch": 0.07326523423740754, "grad_norm": 1.2319193753806756, "learning_rate": 1.9902016201968556e-05, "loss": 0.9508, "step": 520 }, { "epoch": 0.07340612891863332, "grad_norm": 1.283114737902217, "learning_rate": 1.990137788441647e-05, "loss": 0.518, "step": 521 }, { "epoch": 0.07354702359985911, "grad_norm": 1.146367299387529, "learning_rate": 1.9900737504745736e-05, "loss": 1.0301, "step": 522 }, { "epoch": 0.07368791828108488, "grad_norm": 1.2717206296140935, "learning_rate": 1.990009506308972e-05, "loss": 0.9651, "step": 523 }, { "epoch": 0.07382881296231067, "grad_norm": 1.019900986161626, "learning_rate": 1.989945055958222e-05, "loss": 0.9586, "step": 524 }, { "epoch": 0.07396970764353646, "grad_norm": 1.1695558721450203, "learning_rate": 1.989880399435747e-05, "loss": 0.9956, "step": 525 }, { "epoch": 0.07411060232476224, "grad_norm": 1.1726519371373056, "learning_rate": 1.989815536755012e-05, "loss": 0.9767, "step": 526 }, { "epoch": 0.07425149700598803, "grad_norm": 1.1477667891317416, "learning_rate": 1.9897504679295258e-05, "loss": 1.0347, "step": 527 }, { "epoch": 0.0743923916872138, "grad_norm": 1.1552378461718944, "learning_rate": 1.9896851929728403e-05, "loss": 1.0154, "step": 528 }, { "epoch": 0.07453328636843959, "grad_norm": 1.2832350317059515, "learning_rate": 1.98961971189855e-05, "loss": 0.4884, "step": 529 }, { "epoch": 0.07467418104966538, "grad_norm": 1.048045108267807, "learning_rate": 1.9895540247202922e-05, "loss": 1.0255, "step": 530 }, { "epoch": 0.07481507573089116, "grad_norm": 1.077366304735614, "learning_rate": 1.9894881314517475e-05, "loss": 0.948, "step": 531 }, { "epoch": 0.07495597041211695, "grad_norm": 1.3292578225910516, "learning_rate": 1.989422032106639e-05, "loss": 0.5947, "step": 532 }, { "epoch": 0.07509686509334272, "grad_norm": 1.0350974131650434, "learning_rate": 1.9893557266987334e-05, "loss": 1.0013, "step": 533 }, { "epoch": 0.07523775977456851, "grad_norm": 1.0642425903041763, "learning_rate": 1.989289215241839e-05, "loss": 0.9171, "step": 534 }, { "epoch": 0.0753786544557943, "grad_norm": 1.1311438080941507, "learning_rate": 1.9892224977498086e-05, "loss": 0.93, "step": 535 }, { "epoch": 0.07551954913702008, "grad_norm": 1.4177358557537125, "learning_rate": 1.989155574236537e-05, "loss": 0.5593, "step": 536 }, { "epoch": 0.07566044381824587, "grad_norm": 1.3542214499400704, "learning_rate": 1.989088444715962e-05, "loss": 0.5802, "step": 537 }, { "epoch": 0.07580133849947164, "grad_norm": 1.2812108263642212, "learning_rate": 1.9890211092020644e-05, "loss": 1.0251, "step": 538 }, { "epoch": 0.07594223318069743, "grad_norm": 1.1354341077436492, "learning_rate": 1.9889535677088677e-05, "loss": 0.9133, "step": 539 }, { "epoch": 0.07608312786192321, "grad_norm": 1.1506950598163215, "learning_rate": 1.988885820250439e-05, "loss": 0.9828, "step": 540 }, { "epoch": 0.076224022543149, "grad_norm": 1.4194316996058962, "learning_rate": 1.988817866840887e-05, "loss": 0.5301, "step": 541 }, { "epoch": 0.07636491722437477, "grad_norm": 1.1833222196459754, "learning_rate": 1.988749707494365e-05, "loss": 0.9442, "step": 542 }, { "epoch": 0.07650581190560056, "grad_norm": 1.2009169117528133, "learning_rate": 1.9886813422250674e-05, "loss": 0.9752, "step": 543 }, { "epoch": 0.07664670658682635, "grad_norm": 1.4194738270255987, "learning_rate": 1.988612771047233e-05, "loss": 0.5637, "step": 544 }, { "epoch": 0.07678760126805213, "grad_norm": 1.4076710941933908, "learning_rate": 1.9885439939751424e-05, "loss": 0.5936, "step": 545 }, { "epoch": 0.07692849594927792, "grad_norm": 1.2239037828839232, "learning_rate": 1.9884750110231197e-05, "loss": 1.0359, "step": 546 }, { "epoch": 0.07706939063050369, "grad_norm": 1.0890099864406155, "learning_rate": 1.9884058222055318e-05, "loss": 1.0166, "step": 547 }, { "epoch": 0.07721028531172948, "grad_norm": 1.1034176873653039, "learning_rate": 1.9883364275367882e-05, "loss": 0.9339, "step": 548 }, { "epoch": 0.07735117999295527, "grad_norm": 1.0988293813268084, "learning_rate": 1.9882668270313415e-05, "loss": 0.9863, "step": 549 }, { "epoch": 0.07749207467418105, "grad_norm": 1.0377067519045007, "learning_rate": 1.988197020703687e-05, "loss": 0.9824, "step": 550 }, { "epoch": 0.07763296935540684, "grad_norm": 1.142141842617782, "learning_rate": 1.988127008568363e-05, "loss": 0.9996, "step": 551 }, { "epoch": 0.07777386403663261, "grad_norm": 1.1645698315402842, "learning_rate": 1.9880567906399508e-05, "loss": 0.4448, "step": 552 }, { "epoch": 0.0779147587178584, "grad_norm": 1.3641600594646255, "learning_rate": 1.9879863669330742e-05, "loss": 0.613, "step": 553 }, { "epoch": 0.07805565339908418, "grad_norm": 1.3847672060197695, "learning_rate": 1.9879157374624004e-05, "loss": 0.6603, "step": 554 }, { "epoch": 0.07819654808030997, "grad_norm": 1.209907674519828, "learning_rate": 1.9878449022426387e-05, "loss": 0.9986, "step": 555 }, { "epoch": 0.07833744276153576, "grad_norm": 1.1310909383237495, "learning_rate": 1.987773861288542e-05, "loss": 0.9333, "step": 556 }, { "epoch": 0.07847833744276153, "grad_norm": 1.2417293251980512, "learning_rate": 1.987702614614905e-05, "loss": 0.427, "step": 557 }, { "epoch": 0.07861923212398732, "grad_norm": 1.09564097289647, "learning_rate": 1.9876311622365666e-05, "loss": 0.9623, "step": 558 }, { "epoch": 0.0787601268052131, "grad_norm": 1.1192677178511548, "learning_rate": 1.987559504168408e-05, "loss": 0.9948, "step": 559 }, { "epoch": 0.07890102148643889, "grad_norm": 1.145293997534607, "learning_rate": 1.9874876404253528e-05, "loss": 0.9665, "step": 560 }, { "epoch": 0.07904191616766468, "grad_norm": 1.1793378141678004, "learning_rate": 1.9874155710223676e-05, "loss": 0.4941, "step": 561 }, { "epoch": 0.07918281084889045, "grad_norm": 1.3133075376595775, "learning_rate": 1.9873432959744626e-05, "loss": 1.0286, "step": 562 }, { "epoch": 0.07932370553011624, "grad_norm": 1.3852658121858228, "learning_rate": 1.9872708152966895e-05, "loss": 0.5975, "step": 563 }, { "epoch": 0.07946460021134202, "grad_norm": 1.2294782965009852, "learning_rate": 1.9871981290041442e-05, "loss": 0.5366, "step": 564 }, { "epoch": 0.07960549489256781, "grad_norm": 1.0578230165140992, "learning_rate": 1.987125237111964e-05, "loss": 0.9333, "step": 565 }, { "epoch": 0.07974638957379358, "grad_norm": 1.0425863084239895, "learning_rate": 1.987052139635331e-05, "loss": 0.967, "step": 566 }, { "epoch": 0.07988728425501937, "grad_norm": 1.1307263728419827, "learning_rate": 1.9869788365894673e-05, "loss": 1.0685, "step": 567 }, { "epoch": 0.08002817893624516, "grad_norm": 1.2168213702881745, "learning_rate": 1.9869053279896407e-05, "loss": 0.4547, "step": 568 }, { "epoch": 0.08016907361747094, "grad_norm": 1.09845715972631, "learning_rate": 1.9868316138511603e-05, "loss": 0.9449, "step": 569 }, { "epoch": 0.08030996829869673, "grad_norm": 1.1471665415441703, "learning_rate": 1.9867576941893777e-05, "loss": 0.9886, "step": 570 }, { "epoch": 0.0804508629799225, "grad_norm": 1.2750473215521425, "learning_rate": 1.9866835690196882e-05, "loss": 0.5433, "step": 571 }, { "epoch": 0.08059175766114829, "grad_norm": 1.1047288119740553, "learning_rate": 1.98660923835753e-05, "loss": 0.9069, "step": 572 }, { "epoch": 0.08073265234237408, "grad_norm": 1.0103710265003172, "learning_rate": 1.9865347022183826e-05, "loss": 0.9629, "step": 573 }, { "epoch": 0.08087354702359986, "grad_norm": 1.2208018137632433, "learning_rate": 1.9864599606177702e-05, "loss": 0.9971, "step": 574 }, { "epoch": 0.08101444170482565, "grad_norm": 1.3249814146087193, "learning_rate": 1.9863850135712586e-05, "loss": 0.4693, "step": 575 }, { "epoch": 0.08115533638605142, "grad_norm": 1.2282018989294217, "learning_rate": 1.9863098610944567e-05, "loss": 0.9831, "step": 576 }, { "epoch": 0.08129623106727721, "grad_norm": 1.03819158607372, "learning_rate": 1.9862345032030162e-05, "loss": 0.9751, "step": 577 }, { "epoch": 0.081437125748503, "grad_norm": 0.9992896293457979, "learning_rate": 1.9861589399126315e-05, "loss": 0.9487, "step": 578 }, { "epoch": 0.08157802042972878, "grad_norm": 1.2741778478526589, "learning_rate": 1.9860831712390402e-05, "loss": 0.4215, "step": 579 }, { "epoch": 0.08171891511095457, "grad_norm": 1.240395076010389, "learning_rate": 1.9860071971980218e-05, "loss": 0.9324, "step": 580 }, { "epoch": 0.08185980979218034, "grad_norm": 1.1323961014443942, "learning_rate": 1.9859310178053995e-05, "loss": 0.8952, "step": 581 }, { "epoch": 0.08200070447340613, "grad_norm": 1.503388484694774, "learning_rate": 1.9858546330770383e-05, "loss": 0.6699, "step": 582 }, { "epoch": 0.08214159915463191, "grad_norm": 1.3640191339867365, "learning_rate": 1.9857780430288475e-05, "loss": 0.5934, "step": 583 }, { "epoch": 0.0822824938358577, "grad_norm": 1.0537429615824963, "learning_rate": 1.9857012476767774e-05, "loss": 0.9775, "step": 584 }, { "epoch": 0.08242338851708347, "grad_norm": 1.2562986511292134, "learning_rate": 1.9856242470368224e-05, "loss": 0.5744, "step": 585 }, { "epoch": 0.08256428319830926, "grad_norm": 1.2839974693683993, "learning_rate": 1.985547041125018e-05, "loss": 0.4766, "step": 586 }, { "epoch": 0.08270517787953505, "grad_norm": 1.3743974886662869, "learning_rate": 1.9854696299574453e-05, "loss": 0.6457, "step": 587 }, { "epoch": 0.08284607256076083, "grad_norm": 1.2024313673727496, "learning_rate": 1.985392013550225e-05, "loss": 0.4712, "step": 588 }, { "epoch": 0.08298696724198662, "grad_norm": 1.6199761961636314, "learning_rate": 1.9853141919195225e-05, "loss": 0.9678, "step": 589 }, { "epoch": 0.08312786192321239, "grad_norm": 1.0672212208327503, "learning_rate": 1.985236165081545e-05, "loss": 0.993, "step": 590 }, { "epoch": 0.08326875660443818, "grad_norm": 1.113973092626259, "learning_rate": 1.9851579330525433e-05, "loss": 1.0236, "step": 591 }, { "epoch": 0.08340965128566397, "grad_norm": 1.065276244358266, "learning_rate": 1.9850794958488104e-05, "loss": 0.9518, "step": 592 }, { "epoch": 0.08355054596688975, "grad_norm": 1.0122319881634214, "learning_rate": 1.9850008534866818e-05, "loss": 0.9455, "step": 593 }, { "epoch": 0.08369144064811554, "grad_norm": 1.623489560257352, "learning_rate": 1.9849220059825362e-05, "loss": 0.5552, "step": 594 }, { "epoch": 0.08383233532934131, "grad_norm": 1.389522285680972, "learning_rate": 1.9848429533527948e-05, "loss": 0.4514, "step": 595 }, { "epoch": 0.0839732300105671, "grad_norm": 1.1455737883494397, "learning_rate": 1.9847636956139215e-05, "loss": 1.0179, "step": 596 }, { "epoch": 0.08411412469179289, "grad_norm": 1.1301120845505421, "learning_rate": 1.984684232782423e-05, "loss": 0.9803, "step": 597 }, { "epoch": 0.08425501937301867, "grad_norm": 1.279797983279225, "learning_rate": 1.984604564874849e-05, "loss": 0.4874, "step": 598 }, { "epoch": 0.08439591405424446, "grad_norm": 1.0833181368380458, "learning_rate": 1.984524691907791e-05, "loss": 0.9748, "step": 599 }, { "epoch": 0.08453680873547023, "grad_norm": 1.3024482672591924, "learning_rate": 1.9844446138978847e-05, "loss": 0.5994, "step": 600 }, { "epoch": 0.08467770341669602, "grad_norm": 1.107872630825833, "learning_rate": 1.984364330861807e-05, "loss": 1.019, "step": 601 }, { "epoch": 0.0848185980979218, "grad_norm": 1.0537895802405512, "learning_rate": 1.9842838428162774e-05, "loss": 0.9343, "step": 602 }, { "epoch": 0.08495949277914759, "grad_norm": 1.158553123395354, "learning_rate": 1.98420314977806e-05, "loss": 0.9667, "step": 603 }, { "epoch": 0.08510038746037336, "grad_norm": 1.0658846717500603, "learning_rate": 1.98412225176396e-05, "loss": 0.9434, "step": 604 }, { "epoch": 0.08524128214159915, "grad_norm": 1.0827266418491366, "learning_rate": 1.984041148790826e-05, "loss": 1.015, "step": 605 }, { "epoch": 0.08538217682282494, "grad_norm": 1.121923507195813, "learning_rate": 1.9839598408755485e-05, "loss": 0.957, "step": 606 }, { "epoch": 0.08552307150405072, "grad_norm": 1.5365209840451042, "learning_rate": 1.9838783280350613e-05, "loss": 0.8917, "step": 607 }, { "epoch": 0.08566396618527651, "grad_norm": 1.4225433814239639, "learning_rate": 1.9837966102863406e-05, "loss": 0.6467, "step": 608 }, { "epoch": 0.08580486086650228, "grad_norm": 1.2985768210948763, "learning_rate": 1.983714687646406e-05, "loss": 1.0117, "step": 609 }, { "epoch": 0.08594575554772807, "grad_norm": 1.2243084553057175, "learning_rate": 1.983632560132318e-05, "loss": 0.9485, "step": 610 }, { "epoch": 0.08608665022895386, "grad_norm": 1.1836484142167172, "learning_rate": 1.9835502277611822e-05, "loss": 0.5022, "step": 611 }, { "epoch": 0.08622754491017964, "grad_norm": 1.2176129199508394, "learning_rate": 1.9834676905501452e-05, "loss": 0.9455, "step": 612 }, { "epoch": 0.08636843959140543, "grad_norm": 1.332177502796536, "learning_rate": 1.9833849485163962e-05, "loss": 0.5253, "step": 613 }, { "epoch": 0.0865093342726312, "grad_norm": 1.1870895919714874, "learning_rate": 1.9833020016771685e-05, "loss": 0.9617, "step": 614 }, { "epoch": 0.08665022895385699, "grad_norm": 1.3141136385020364, "learning_rate": 1.9832188500497362e-05, "loss": 0.991, "step": 615 }, { "epoch": 0.08679112363508278, "grad_norm": 1.4297440664622338, "learning_rate": 1.9831354936514174e-05, "loss": 0.6436, "step": 616 }, { "epoch": 0.08693201831630856, "grad_norm": 1.0504878219947607, "learning_rate": 1.983051932499572e-05, "loss": 0.9761, "step": 617 }, { "epoch": 0.08707291299753435, "grad_norm": 1.0301382051046537, "learning_rate": 1.9829681666116036e-05, "loss": 0.9861, "step": 618 }, { "epoch": 0.08721380767876012, "grad_norm": 1.0306252449195001, "learning_rate": 1.9828841960049573e-05, "loss": 0.9998, "step": 619 }, { "epoch": 0.08735470235998591, "grad_norm": 1.28100859286289, "learning_rate": 1.982800020697121e-05, "loss": 1.0327, "step": 620 }, { "epoch": 0.0874955970412117, "grad_norm": 1.1859528869002225, "learning_rate": 1.9827156407056264e-05, "loss": 1.0073, "step": 621 }, { "epoch": 0.08763649172243748, "grad_norm": 1.462218628605847, "learning_rate": 1.9826310560480465e-05, "loss": 0.5249, "step": 622 }, { "epoch": 0.08777738640366327, "grad_norm": 1.1397585657716718, "learning_rate": 1.982546266741997e-05, "loss": 0.9485, "step": 623 }, { "epoch": 0.08791828108488904, "grad_norm": 1.1561414501519525, "learning_rate": 1.9824612728051377e-05, "loss": 0.9918, "step": 624 }, { "epoch": 0.08805917576611483, "grad_norm": 1.1197098444653768, "learning_rate": 1.9823760742551686e-05, "loss": 0.9321, "step": 625 }, { "epoch": 0.08820007044734061, "grad_norm": 1.384622644580624, "learning_rate": 1.9822906711098345e-05, "loss": 0.5844, "step": 626 }, { "epoch": 0.0883409651285664, "grad_norm": 1.2775651192391972, "learning_rate": 1.9822050633869216e-05, "loss": 1.0381, "step": 627 }, { "epoch": 0.08848185980979217, "grad_norm": 1.0809490980732603, "learning_rate": 1.9821192511042594e-05, "loss": 0.9822, "step": 628 }, { "epoch": 0.08862275449101796, "grad_norm": 1.4805903406042582, "learning_rate": 1.9820332342797193e-05, "loss": 0.5121, "step": 629 }, { "epoch": 0.08876364917224375, "grad_norm": 1.1708276007094294, "learning_rate": 1.9819470129312156e-05, "loss": 0.91, "step": 630 }, { "epoch": 0.08890454385346953, "grad_norm": 1.1077324539206754, "learning_rate": 1.9818605870767062e-05, "loss": 0.9547, "step": 631 }, { "epoch": 0.08904543853469532, "grad_norm": 1.0849408611322329, "learning_rate": 1.9817739567341894e-05, "loss": 0.9822, "step": 632 }, { "epoch": 0.0891863332159211, "grad_norm": 1.2663853114925523, "learning_rate": 1.981687121921708e-05, "loss": 0.925, "step": 633 }, { "epoch": 0.08932722789714688, "grad_norm": 1.0444079063570288, "learning_rate": 1.9816000826573467e-05, "loss": 0.9956, "step": 634 }, { "epoch": 0.08946812257837267, "grad_norm": 1.3551538870180762, "learning_rate": 1.9815128389592326e-05, "loss": 0.568, "step": 635 }, { "epoch": 0.08960901725959845, "grad_norm": 1.3134596142584571, "learning_rate": 1.9814253908455358e-05, "loss": 0.5144, "step": 636 }, { "epoch": 0.08974991194082424, "grad_norm": 1.2719176261901508, "learning_rate": 1.981337738334469e-05, "loss": 0.4857, "step": 637 }, { "epoch": 0.08989080662205001, "grad_norm": 1.015039744871252, "learning_rate": 1.981249881444286e-05, "loss": 0.9186, "step": 638 }, { "epoch": 0.0900317013032758, "grad_norm": 1.0715179048725756, "learning_rate": 1.981161820193286e-05, "loss": 0.9498, "step": 639 }, { "epoch": 0.09017259598450159, "grad_norm": 1.2681640039177944, "learning_rate": 1.9810735545998077e-05, "loss": 0.5172, "step": 640 }, { "epoch": 0.09031349066572737, "grad_norm": 1.0825117375473343, "learning_rate": 1.9809850846822353e-05, "loss": 0.9115, "step": 641 }, { "epoch": 0.09045438534695316, "grad_norm": 1.0265969247654323, "learning_rate": 1.980896410458993e-05, "loss": 0.9555, "step": 642 }, { "epoch": 0.09059528002817893, "grad_norm": 1.0259920700086402, "learning_rate": 1.9808075319485485e-05, "loss": 0.9641, "step": 643 }, { "epoch": 0.09073617470940472, "grad_norm": 1.0957974594983815, "learning_rate": 1.980718449169413e-05, "loss": 0.9831, "step": 644 }, { "epoch": 0.0908770693906305, "grad_norm": 1.1827838457644242, "learning_rate": 1.9806291621401387e-05, "loss": 0.9847, "step": 645 }, { "epoch": 0.09101796407185629, "grad_norm": 1.2344447058342387, "learning_rate": 1.9805396708793214e-05, "loss": 0.4667, "step": 646 }, { "epoch": 0.09115885875308206, "grad_norm": 1.263780409973115, "learning_rate": 1.980449975405599e-05, "loss": 0.9491, "step": 647 }, { "epoch": 0.09129975343430785, "grad_norm": 1.0856010688477562, "learning_rate": 1.980360075737652e-05, "loss": 0.9808, "step": 648 }, { "epoch": 0.09144064811553364, "grad_norm": 1.095094272496811, "learning_rate": 1.9802699718942033e-05, "loss": 0.9859, "step": 649 }, { "epoch": 0.09158154279675942, "grad_norm": 1.2763115419427353, "learning_rate": 1.9801796638940184e-05, "loss": 0.4674, "step": 650 }, { "epoch": 0.09172243747798521, "grad_norm": 1.180273683531674, "learning_rate": 1.9800891517559056e-05, "loss": 1.0119, "step": 651 }, { "epoch": 0.09186333215921098, "grad_norm": 1.1912814655668251, "learning_rate": 1.9799984354987158e-05, "loss": 1.0175, "step": 652 }, { "epoch": 0.09200422684043677, "grad_norm": 1.17359246240875, "learning_rate": 1.979907515141341e-05, "loss": 0.9392, "step": 653 }, { "epoch": 0.09214512152166256, "grad_norm": 1.0408777696463238, "learning_rate": 1.9798163907027178e-05, "loss": 0.9102, "step": 654 }, { "epoch": 0.09228601620288834, "grad_norm": 1.0678759115010124, "learning_rate": 1.979725062201824e-05, "loss": 0.962, "step": 655 }, { "epoch": 0.09242691088411413, "grad_norm": 1.1278805032220802, "learning_rate": 1.9796335296576804e-05, "loss": 0.9759, "step": 656 }, { "epoch": 0.0925678055653399, "grad_norm": 1.2761319738908505, "learning_rate": 1.9795417930893495e-05, "loss": 1.0094, "step": 657 }, { "epoch": 0.09270870024656569, "grad_norm": 1.305710546427972, "learning_rate": 1.9794498525159373e-05, "loss": 0.5251, "step": 658 }, { "epoch": 0.09284959492779148, "grad_norm": 1.0846298127347531, "learning_rate": 1.979357707956592e-05, "loss": 1.0203, "step": 659 }, { "epoch": 0.09299048960901726, "grad_norm": 1.2322750660258366, "learning_rate": 1.9792653594305044e-05, "loss": 0.5019, "step": 660 }, { "epoch": 0.09313138429024305, "grad_norm": 1.0679071038082457, "learning_rate": 1.9791728069569068e-05, "loss": 0.9219, "step": 661 }, { "epoch": 0.09327227897146882, "grad_norm": 1.2098384503120336, "learning_rate": 1.979080050555075e-05, "loss": 0.9623, "step": 662 }, { "epoch": 0.09341317365269461, "grad_norm": 1.225404580483646, "learning_rate": 1.9789870902443274e-05, "loss": 0.9438, "step": 663 }, { "epoch": 0.0935540683339204, "grad_norm": 1.1187400512742947, "learning_rate": 1.978893926044024e-05, "loss": 0.965, "step": 664 }, { "epoch": 0.09369496301514618, "grad_norm": 1.0649413782926098, "learning_rate": 1.9788005579735676e-05, "loss": 0.9361, "step": 665 }, { "epoch": 0.09383585769637197, "grad_norm": 1.0876947194584485, "learning_rate": 1.978706986052404e-05, "loss": 1.0001, "step": 666 }, { "epoch": 0.09397675237759774, "grad_norm": 1.234424507445515, "learning_rate": 1.978613210300021e-05, "loss": 1.0088, "step": 667 }, { "epoch": 0.09411764705882353, "grad_norm": 1.2027836809407444, "learning_rate": 1.9785192307359488e-05, "loss": 1.0039, "step": 668 }, { "epoch": 0.09425854174004931, "grad_norm": 1.346839648269879, "learning_rate": 1.97842504737976e-05, "loss": 0.5462, "step": 669 }, { "epoch": 0.0943994364212751, "grad_norm": 1.3207854554884662, "learning_rate": 1.9783306602510697e-05, "loss": 0.9628, "step": 670 }, { "epoch": 0.09454033110250087, "grad_norm": 1.323871116386629, "learning_rate": 1.9782360693695358e-05, "loss": 0.5612, "step": 671 }, { "epoch": 0.09468122578372666, "grad_norm": 1.134320950263989, "learning_rate": 1.9781412747548585e-05, "loss": 0.9598, "step": 672 }, { "epoch": 0.09482212046495245, "grad_norm": 1.19651692917626, "learning_rate": 1.9780462764267797e-05, "loss": 0.9883, "step": 673 }, { "epoch": 0.09496301514617823, "grad_norm": 1.3363192468388543, "learning_rate": 1.9779510744050844e-05, "loss": 0.5953, "step": 674 }, { "epoch": 0.09510390982740402, "grad_norm": 1.0375170657819761, "learning_rate": 1.9778556687096007e-05, "loss": 0.9526, "step": 675 }, { "epoch": 0.0952448045086298, "grad_norm": 1.1767346525172646, "learning_rate": 1.9777600593601972e-05, "loss": 0.9504, "step": 676 }, { "epoch": 0.09538569918985558, "grad_norm": 1.0753706971465375, "learning_rate": 1.977664246376787e-05, "loss": 1.0006, "step": 677 }, { "epoch": 0.09552659387108137, "grad_norm": 1.1349821088335788, "learning_rate": 1.9775682297793245e-05, "loss": 0.9924, "step": 678 }, { "epoch": 0.09566748855230715, "grad_norm": 1.1564189080939016, "learning_rate": 1.977472009587806e-05, "loss": 0.9443, "step": 679 }, { "epoch": 0.09580838323353294, "grad_norm": 1.1368933084497193, "learning_rate": 1.9773755858222717e-05, "loss": 1.0178, "step": 680 }, { "epoch": 0.09594927791475871, "grad_norm": 1.034917353344891, "learning_rate": 1.977278958502803e-05, "loss": 0.9445, "step": 681 }, { "epoch": 0.0960901725959845, "grad_norm": 1.1399328383735075, "learning_rate": 1.977182127649524e-05, "loss": 0.9104, "step": 682 }, { "epoch": 0.09623106727721029, "grad_norm": 0.9973303489018454, "learning_rate": 1.9770850932826015e-05, "loss": 0.9572, "step": 683 }, { "epoch": 0.09637196195843607, "grad_norm": 1.3566534758425342, "learning_rate": 1.976987855422244e-05, "loss": 0.589, "step": 684 }, { "epoch": 0.09651285663966186, "grad_norm": 1.180623309417536, "learning_rate": 1.9768904140887035e-05, "loss": 1.021, "step": 685 }, { "epoch": 0.09665375132088763, "grad_norm": 1.157168059219168, "learning_rate": 1.976792769302273e-05, "loss": 1.0097, "step": 686 }, { "epoch": 0.09679464600211342, "grad_norm": 1.258808950921858, "learning_rate": 1.976694921083289e-05, "loss": 0.5053, "step": 687 }, { "epoch": 0.0969355406833392, "grad_norm": 1.054361613868129, "learning_rate": 1.9765968694521303e-05, "loss": 0.9964, "step": 688 }, { "epoch": 0.09707643536456499, "grad_norm": 1.1256878724005719, "learning_rate": 1.9764986144292167e-05, "loss": 0.9485, "step": 689 }, { "epoch": 0.09721733004579076, "grad_norm": 1.0674850694678928, "learning_rate": 1.976400156035012e-05, "loss": 0.907, "step": 690 }, { "epoch": 0.09735822472701655, "grad_norm": 1.2844265055293955, "learning_rate": 1.976301494290022e-05, "loss": 0.4819, "step": 691 }, { "epoch": 0.09749911940824234, "grad_norm": 1.0235545815946188, "learning_rate": 1.9762026292147938e-05, "loss": 0.9384, "step": 692 }, { "epoch": 0.09764001408946812, "grad_norm": 1.2161276701320187, "learning_rate": 1.976103560829918e-05, "loss": 0.5251, "step": 693 }, { "epoch": 0.09778090877069391, "grad_norm": 1.0890549819723319, "learning_rate": 1.9760042891560275e-05, "loss": 0.9451, "step": 694 }, { "epoch": 0.09792180345191968, "grad_norm": 1.2714979307178225, "learning_rate": 1.9759048142137966e-05, "loss": 0.9927, "step": 695 }, { "epoch": 0.09806269813314547, "grad_norm": 1.071902847079122, "learning_rate": 1.975805136023943e-05, "loss": 0.9268, "step": 696 }, { "epoch": 0.09820359281437126, "grad_norm": 1.0665931174462493, "learning_rate": 1.9757052546072258e-05, "loss": 0.9758, "step": 697 }, { "epoch": 0.09834448749559704, "grad_norm": 1.333439372694271, "learning_rate": 1.9756051699844474e-05, "loss": 0.535, "step": 698 }, { "epoch": 0.09848538217682283, "grad_norm": 1.4434910961297307, "learning_rate": 1.9755048821764516e-05, "loss": 0.4824, "step": 699 }, { "epoch": 0.0986262768580486, "grad_norm": 1.3225023877596236, "learning_rate": 1.9754043912041254e-05, "loss": 0.5191, "step": 700 }, { "epoch": 0.09876717153927439, "grad_norm": 1.3572023113519833, "learning_rate": 1.975303697088397e-05, "loss": 0.5436, "step": 701 }, { "epoch": 0.09890806622050018, "grad_norm": 1.2171192301746823, "learning_rate": 1.9752027998502378e-05, "loss": 0.9818, "step": 702 }, { "epoch": 0.09904896090172596, "grad_norm": 1.0549970797956296, "learning_rate": 1.9751016995106617e-05, "loss": 0.9613, "step": 703 }, { "epoch": 0.09918985558295175, "grad_norm": 1.0832467612000458, "learning_rate": 1.9750003960907237e-05, "loss": 1.0011, "step": 704 }, { "epoch": 0.09933075026417752, "grad_norm": 1.2311097437112193, "learning_rate": 1.974898889611522e-05, "loss": 0.9722, "step": 705 }, { "epoch": 0.09947164494540331, "grad_norm": 1.5621060892236724, "learning_rate": 1.9747971800941977e-05, "loss": 0.5438, "step": 706 }, { "epoch": 0.0996125396266291, "grad_norm": 1.2558458836440318, "learning_rate": 1.9746952675599328e-05, "loss": 0.9814, "step": 707 }, { "epoch": 0.09975343430785488, "grad_norm": 1.1087834562514371, "learning_rate": 1.9745931520299517e-05, "loss": 0.9571, "step": 708 }, { "epoch": 0.09989432898908067, "grad_norm": 1.1706316471023734, "learning_rate": 1.9744908335255224e-05, "loss": 0.9532, "step": 709 }, { "epoch": 0.10003522367030644, "grad_norm": 0.9907900202465132, "learning_rate": 1.974388312067954e-05, "loss": 0.9618, "step": 710 }, { "epoch": 0.10017611835153223, "grad_norm": 1.1627763389992125, "learning_rate": 1.9742855876785983e-05, "loss": 0.9122, "step": 711 }, { "epoch": 0.10031701303275802, "grad_norm": 1.2900584584689212, "learning_rate": 1.9741826603788496e-05, "loss": 0.9587, "step": 712 }, { "epoch": 0.1004579077139838, "grad_norm": 1.6394965609253682, "learning_rate": 1.9740795301901433e-05, "loss": 0.5738, "step": 713 }, { "epoch": 0.10059880239520957, "grad_norm": 1.073795407023253, "learning_rate": 1.9739761971339586e-05, "loss": 0.9361, "step": 714 }, { "epoch": 0.10073969707643536, "grad_norm": 1.177873000784913, "learning_rate": 1.973872661231816e-05, "loss": 0.9325, "step": 715 }, { "epoch": 0.10088059175766115, "grad_norm": 1.3076070828293824, "learning_rate": 1.9737689225052787e-05, "loss": 0.5527, "step": 716 }, { "epoch": 0.10102148643888693, "grad_norm": 1.058651181575819, "learning_rate": 1.9736649809759517e-05, "loss": 1.0073, "step": 717 }, { "epoch": 0.10116238112011272, "grad_norm": 1.1356518038324694, "learning_rate": 1.9735608366654823e-05, "loss": 0.497, "step": 718 }, { "epoch": 0.1013032758013385, "grad_norm": 1.2171293022171061, "learning_rate": 1.973456489595561e-05, "loss": 0.9881, "step": 719 }, { "epoch": 0.10144417048256428, "grad_norm": 1.2116706295830073, "learning_rate": 1.9733519397879184e-05, "loss": 0.9438, "step": 720 }, { "epoch": 0.10158506516379007, "grad_norm": 1.2105950098174436, "learning_rate": 1.97324718726433e-05, "loss": 0.8829, "step": 721 }, { "epoch": 0.10172595984501585, "grad_norm": 1.5527468003286105, "learning_rate": 1.9731422320466112e-05, "loss": 0.7235, "step": 722 }, { "epoch": 0.10186685452624164, "grad_norm": 1.0553648199744257, "learning_rate": 1.9730370741566213e-05, "loss": 0.9517, "step": 723 }, { "epoch": 0.10200774920746741, "grad_norm": 1.1367336457584698, "learning_rate": 1.9729317136162604e-05, "loss": 0.9521, "step": 724 }, { "epoch": 0.1021486438886932, "grad_norm": 1.2265325543303207, "learning_rate": 1.972826150447472e-05, "loss": 0.9736, "step": 725 }, { "epoch": 0.10228953856991899, "grad_norm": 1.1062028555719752, "learning_rate": 1.9727203846722413e-05, "loss": 0.9635, "step": 726 }, { "epoch": 0.10243043325114477, "grad_norm": 1.1269540813111543, "learning_rate": 1.9726144163125953e-05, "loss": 1.0241, "step": 727 }, { "epoch": 0.10257132793237056, "grad_norm": 1.3530756370622208, "learning_rate": 1.972508245390604e-05, "loss": 0.954, "step": 728 }, { "epoch": 0.10271222261359633, "grad_norm": 1.181352264804922, "learning_rate": 1.972401871928379e-05, "loss": 0.9408, "step": 729 }, { "epoch": 0.10285311729482212, "grad_norm": 1.211492261236408, "learning_rate": 1.972295295948074e-05, "loss": 0.9716, "step": 730 }, { "epoch": 0.1029940119760479, "grad_norm": 1.469742751102422, "learning_rate": 1.972188517471886e-05, "loss": 0.5857, "step": 731 }, { "epoch": 0.10313490665727369, "grad_norm": 1.144503130355045, "learning_rate": 1.9720815365220522e-05, "loss": 1.0005, "step": 732 }, { "epoch": 0.10327580133849946, "grad_norm": 1.202091918837798, "learning_rate": 1.971974353120854e-05, "loss": 1.0087, "step": 733 }, { "epoch": 0.10341669601972525, "grad_norm": 1.0111683528884683, "learning_rate": 1.9718669672906135e-05, "loss": 0.9798, "step": 734 }, { "epoch": 0.10355759070095104, "grad_norm": 0.9932682638866163, "learning_rate": 1.9717593790536954e-05, "loss": 0.9267, "step": 735 }, { "epoch": 0.10369848538217682, "grad_norm": 1.124349185289762, "learning_rate": 1.9716515884325068e-05, "loss": 0.4747, "step": 736 }, { "epoch": 0.10383938006340261, "grad_norm": 1.14140106674472, "learning_rate": 1.9715435954494974e-05, "loss": 0.9455, "step": 737 }, { "epoch": 0.10398027474462838, "grad_norm": 1.0642819775772019, "learning_rate": 1.9714354001271576e-05, "loss": 0.945, "step": 738 }, { "epoch": 0.10412116942585417, "grad_norm": 1.1329514513615888, "learning_rate": 1.971327002488021e-05, "loss": 0.979, "step": 739 }, { "epoch": 0.10426206410707996, "grad_norm": 1.3752940126525235, "learning_rate": 1.9712184025546636e-05, "loss": 0.4761, "step": 740 }, { "epoch": 0.10440295878830574, "grad_norm": 1.0894024282930659, "learning_rate": 1.9711096003497027e-05, "loss": 0.9986, "step": 741 }, { "epoch": 0.10454385346953153, "grad_norm": 1.0848441066093248, "learning_rate": 1.971000595895798e-05, "loss": 0.9608, "step": 742 }, { "epoch": 0.1046847481507573, "grad_norm": 1.1150699001784945, "learning_rate": 1.9708913892156513e-05, "loss": 0.9351, "step": 743 }, { "epoch": 0.10482564283198309, "grad_norm": 1.1389530815390512, "learning_rate": 1.970781980332007e-05, "loss": 0.9845, "step": 744 }, { "epoch": 0.10496653751320888, "grad_norm": 1.3517338455016699, "learning_rate": 1.970672369267651e-05, "loss": 0.4878, "step": 745 }, { "epoch": 0.10510743219443466, "grad_norm": 1.2256526705939927, "learning_rate": 1.9705625560454117e-05, "loss": 1.0212, "step": 746 }, { "epoch": 0.10524832687566045, "grad_norm": 1.2137016319899268, "learning_rate": 1.9704525406881593e-05, "loss": 0.9847, "step": 747 }, { "epoch": 0.10538922155688622, "grad_norm": 1.3471244861780312, "learning_rate": 1.9703423232188056e-05, "loss": 0.9989, "step": 748 }, { "epoch": 0.10553011623811201, "grad_norm": 1.1623105983236077, "learning_rate": 1.9702319036603067e-05, "loss": 0.4595, "step": 749 }, { "epoch": 0.1056710109193378, "grad_norm": 1.099437739951608, "learning_rate": 1.970121282035658e-05, "loss": 0.9616, "step": 750 }, { "epoch": 0.10581190560056358, "grad_norm": 1.3093832811433845, "learning_rate": 1.970010458367898e-05, "loss": 0.5162, "step": 751 }, { "epoch": 0.10595280028178937, "grad_norm": 1.1336031758918115, "learning_rate": 1.9698994326801085e-05, "loss": 0.9489, "step": 752 }, { "epoch": 0.10609369496301514, "grad_norm": 1.3174167087833317, "learning_rate": 1.969788204995412e-05, "loss": 0.6082, "step": 753 }, { "epoch": 0.10623458964424093, "grad_norm": 1.2236130280559807, "learning_rate": 1.9696767753369732e-05, "loss": 1.0553, "step": 754 }, { "epoch": 0.10637548432546672, "grad_norm": 1.134093687348374, "learning_rate": 1.969565143727999e-05, "loss": 0.996, "step": 755 }, { "epoch": 0.1065163790066925, "grad_norm": 1.2013896448740664, "learning_rate": 1.969453310191739e-05, "loss": 0.4579, "step": 756 }, { "epoch": 0.10665727368791827, "grad_norm": 1.0841191726308255, "learning_rate": 1.9693412747514833e-05, "loss": 0.9559, "step": 757 }, { "epoch": 0.10679816836914406, "grad_norm": 1.329669686212773, "learning_rate": 1.969229037430566e-05, "loss": 1.0171, "step": 758 }, { "epoch": 0.10693906305036985, "grad_norm": 1.0888617765927409, "learning_rate": 1.969116598252362e-05, "loss": 0.9519, "step": 759 }, { "epoch": 0.10707995773159563, "grad_norm": 1.3930705601153006, "learning_rate": 1.9690039572402883e-05, "loss": 0.591, "step": 760 }, { "epoch": 0.10722085241282142, "grad_norm": 1.0133426266039662, "learning_rate": 1.9688911144178046e-05, "loss": 0.9684, "step": 761 }, { "epoch": 0.1073617470940472, "grad_norm": 1.3096021925562844, "learning_rate": 1.968778069808412e-05, "loss": 0.5074, "step": 762 }, { "epoch": 0.10750264177527298, "grad_norm": 1.3746506444893123, "learning_rate": 1.9686648234356537e-05, "loss": 0.4891, "step": 763 }, { "epoch": 0.10764353645649877, "grad_norm": 1.050956953521921, "learning_rate": 1.9685513753231152e-05, "loss": 0.9178, "step": 764 }, { "epoch": 0.10778443113772455, "grad_norm": 1.28170840181906, "learning_rate": 1.968437725494424e-05, "loss": 0.5007, "step": 765 }, { "epoch": 0.10792532581895034, "grad_norm": 0.9955094823453181, "learning_rate": 1.9683238739732493e-05, "loss": 0.9383, "step": 766 }, { "epoch": 0.10806622050017611, "grad_norm": 1.3095601300366, "learning_rate": 1.9682098207833022e-05, "loss": 0.5402, "step": 767 }, { "epoch": 0.1082071151814019, "grad_norm": 1.199257266910251, "learning_rate": 1.9680955659483366e-05, "loss": 1.0716, "step": 768 }, { "epoch": 0.10834800986262769, "grad_norm": 1.2439898862242744, "learning_rate": 1.9679811094921478e-05, "loss": 0.5013, "step": 769 }, { "epoch": 0.10848890454385347, "grad_norm": 1.2100938348620098, "learning_rate": 1.9678664514385728e-05, "loss": 0.9718, "step": 770 }, { "epoch": 0.10862979922507926, "grad_norm": 1.1841665296193016, "learning_rate": 1.9677515918114912e-05, "loss": 0.4536, "step": 771 }, { "epoch": 0.10877069390630503, "grad_norm": 1.114113095084908, "learning_rate": 1.9676365306348244e-05, "loss": 0.976, "step": 772 }, { "epoch": 0.10891158858753082, "grad_norm": 1.2094337722993584, "learning_rate": 1.9675212679325354e-05, "loss": 0.51, "step": 773 }, { "epoch": 0.1090524832687566, "grad_norm": 1.2269721223893049, "learning_rate": 1.96740580372863e-05, "loss": 0.951, "step": 774 }, { "epoch": 0.10919337794998239, "grad_norm": 1.1323282033816926, "learning_rate": 1.967290138047155e-05, "loss": 0.9397, "step": 775 }, { "epoch": 0.10933427263120817, "grad_norm": 1.0752425149502265, "learning_rate": 1.9671742709121995e-05, "loss": 0.926, "step": 776 }, { "epoch": 0.10947516731243395, "grad_norm": 1.3949476741126348, "learning_rate": 1.9670582023478952e-05, "loss": 0.5256, "step": 777 }, { "epoch": 0.10961606199365974, "grad_norm": 1.3219385239130692, "learning_rate": 1.966941932378415e-05, "loss": 0.5039, "step": 778 }, { "epoch": 0.10975695667488553, "grad_norm": 1.2323168442051677, "learning_rate": 1.9668254610279737e-05, "loss": 0.4729, "step": 779 }, { "epoch": 0.10989785135611131, "grad_norm": 1.1179163014442797, "learning_rate": 1.9667087883208286e-05, "loss": 0.9784, "step": 780 }, { "epoch": 0.11003874603733708, "grad_norm": 1.354389732725919, "learning_rate": 1.9665919142812787e-05, "loss": 0.5961, "step": 781 }, { "epoch": 0.11017964071856287, "grad_norm": 1.3230593191353834, "learning_rate": 1.9664748389336644e-05, "loss": 0.4657, "step": 782 }, { "epoch": 0.11032053539978866, "grad_norm": 1.3737879932896184, "learning_rate": 1.966357562302369e-05, "loss": 0.5686, "step": 783 }, { "epoch": 0.11046143008101444, "grad_norm": 1.1347955480506986, "learning_rate": 1.966240084411817e-05, "loss": 1.0013, "step": 784 }, { "epoch": 0.11060232476224023, "grad_norm": 1.007517540236813, "learning_rate": 1.9661224052864752e-05, "loss": 0.9733, "step": 785 }, { "epoch": 0.110743219443466, "grad_norm": 0.9954272730864696, "learning_rate": 1.9660045249508518e-05, "loss": 0.9575, "step": 786 }, { "epoch": 0.11088411412469179, "grad_norm": 1.0508796375285872, "learning_rate": 1.9658864434294975e-05, "loss": 0.8886, "step": 787 }, { "epoch": 0.11102500880591758, "grad_norm": 1.1259850602368104, "learning_rate": 1.9657681607470045e-05, "loss": 0.9794, "step": 788 }, { "epoch": 0.11116590348714336, "grad_norm": 1.1174269658944889, "learning_rate": 1.9656496769280073e-05, "loss": 0.9253, "step": 789 }, { "epoch": 0.11130679816836915, "grad_norm": 1.0694934660741284, "learning_rate": 1.9655309919971818e-05, "loss": 0.8998, "step": 790 }, { "epoch": 0.11144769284959492, "grad_norm": 1.1197345921094215, "learning_rate": 1.965412105979246e-05, "loss": 0.9136, "step": 791 }, { "epoch": 0.11158858753082071, "grad_norm": 1.8483409783767866, "learning_rate": 1.9652930188989593e-05, "loss": 0.811, "step": 792 }, { "epoch": 0.1117294822120465, "grad_norm": 1.0170925990994475, "learning_rate": 1.9651737307811246e-05, "loss": 0.928, "step": 793 }, { "epoch": 0.11187037689327228, "grad_norm": 1.367493690659291, "learning_rate": 1.9650542416505852e-05, "loss": 0.6923, "step": 794 }, { "epoch": 0.11201127157449806, "grad_norm": 1.0930960922832893, "learning_rate": 1.964934551532226e-05, "loss": 0.9677, "step": 795 }, { "epoch": 0.11215216625572384, "grad_norm": 1.0584328983644327, "learning_rate": 1.964814660450975e-05, "loss": 0.9804, "step": 796 }, { "epoch": 0.11229306093694963, "grad_norm": 1.1858768121471555, "learning_rate": 1.964694568431801e-05, "loss": 0.9366, "step": 797 }, { "epoch": 0.11243395561817542, "grad_norm": 1.1261664650108947, "learning_rate": 1.964574275499715e-05, "loss": 1.0236, "step": 798 }, { "epoch": 0.1125748502994012, "grad_norm": 1.0154515301636415, "learning_rate": 1.9644537816797702e-05, "loss": 0.9825, "step": 799 }, { "epoch": 0.11271574498062698, "grad_norm": 1.003635169607517, "learning_rate": 1.9643330869970617e-05, "loss": 1.0057, "step": 800 }, { "epoch": 0.11285663966185276, "grad_norm": 0.9809784480629774, "learning_rate": 1.9642121914767252e-05, "loss": 0.9587, "step": 801 }, { "epoch": 0.11299753434307855, "grad_norm": 1.3676526811395198, "learning_rate": 1.9640910951439395e-05, "loss": 0.5218, "step": 802 }, { "epoch": 0.11313842902430434, "grad_norm": 0.9780917988737896, "learning_rate": 1.9639697980239252e-05, "loss": 0.9606, "step": 803 }, { "epoch": 0.11327932370553012, "grad_norm": 0.9835321045291253, "learning_rate": 1.963848300141944e-05, "loss": 0.9584, "step": 804 }, { "epoch": 0.1134202183867559, "grad_norm": 1.1515471103418902, "learning_rate": 1.9637266015232993e-05, "loss": 0.9609, "step": 805 }, { "epoch": 0.11356111306798168, "grad_norm": 1.0154926698645481, "learning_rate": 1.9636047021933374e-05, "loss": 0.9336, "step": 806 }, { "epoch": 0.11370200774920747, "grad_norm": 1.2099378798013143, "learning_rate": 1.963482602177446e-05, "loss": 0.9541, "step": 807 }, { "epoch": 0.11384290243043325, "grad_norm": 1.1643233694944717, "learning_rate": 1.9633603015010538e-05, "loss": 0.9982, "step": 808 }, { "epoch": 0.11398379711165904, "grad_norm": 1.4152082329131366, "learning_rate": 1.963237800189632e-05, "loss": 0.5834, "step": 809 }, { "epoch": 0.11412469179288481, "grad_norm": 1.0717945833751295, "learning_rate": 1.9631150982686937e-05, "loss": 0.9845, "step": 810 }, { "epoch": 0.1142655864741106, "grad_norm": 1.1277233010609866, "learning_rate": 1.9629921957637927e-05, "loss": 0.9551, "step": 811 }, { "epoch": 0.11440648115533639, "grad_norm": 0.9840212759596212, "learning_rate": 1.962869092700526e-05, "loss": 0.9141, "step": 812 }, { "epoch": 0.11454737583656217, "grad_norm": 1.1674346506255686, "learning_rate": 1.9627457891045322e-05, "loss": 0.4511, "step": 813 }, { "epoch": 0.11468827051778796, "grad_norm": 1.2847710761089381, "learning_rate": 1.9626222850014905e-05, "loss": 0.6519, "step": 814 }, { "epoch": 0.11482916519901373, "grad_norm": 1.8338269826600586, "learning_rate": 1.962498580417123e-05, "loss": 0.5799, "step": 815 }, { "epoch": 0.11497005988023952, "grad_norm": 1.1611451636203494, "learning_rate": 1.9623746753771925e-05, "loss": 0.9167, "step": 816 }, { "epoch": 0.1151109545614653, "grad_norm": 1.3083355469997147, "learning_rate": 1.962250569907505e-05, "loss": 0.5299, "step": 817 }, { "epoch": 0.11525184924269109, "grad_norm": 1.0987293027467662, "learning_rate": 1.962126264033907e-05, "loss": 0.9588, "step": 818 }, { "epoch": 0.11539274392391687, "grad_norm": 1.1437186230968643, "learning_rate": 1.9620017577822873e-05, "loss": 0.9642, "step": 819 }, { "epoch": 0.11553363860514265, "grad_norm": 0.9957943145565882, "learning_rate": 1.9618770511785764e-05, "loss": 0.9316, "step": 820 }, { "epoch": 0.11567453328636844, "grad_norm": 1.1744706734307315, "learning_rate": 1.961752144248746e-05, "loss": 0.8971, "step": 821 }, { "epoch": 0.11581542796759423, "grad_norm": 1.374952417770522, "learning_rate": 1.9616270370188104e-05, "loss": 0.599, "step": 822 }, { "epoch": 0.11595632264882001, "grad_norm": 1.4243305374835042, "learning_rate": 1.961501729514825e-05, "loss": 0.5899, "step": 823 }, { "epoch": 0.11609721733004578, "grad_norm": 1.0904073039254616, "learning_rate": 1.9613762217628868e-05, "loss": 0.9443, "step": 824 }, { "epoch": 0.11623811201127157, "grad_norm": 1.4198420235494893, "learning_rate": 1.9612505137891352e-05, "loss": 0.5531, "step": 825 }, { "epoch": 0.11637900669249736, "grad_norm": 1.5510226003850003, "learning_rate": 1.9611246056197507e-05, "loss": 0.4774, "step": 826 }, { "epoch": 0.11651990137372314, "grad_norm": 0.9947240393260481, "learning_rate": 1.9609984972809557e-05, "loss": 0.9407, "step": 827 }, { "epoch": 0.11666079605494893, "grad_norm": 1.2941020067141804, "learning_rate": 1.960872188799014e-05, "loss": 0.8795, "step": 828 }, { "epoch": 0.1168016907361747, "grad_norm": 1.1015871920724778, "learning_rate": 1.960745680200232e-05, "loss": 0.9959, "step": 829 }, { "epoch": 0.11694258541740049, "grad_norm": 1.1139922237838016, "learning_rate": 1.9606189715109563e-05, "loss": 0.9855, "step": 830 }, { "epoch": 0.11708348009862628, "grad_norm": 1.4451695690552542, "learning_rate": 1.9604920627575764e-05, "loss": 0.5964, "step": 831 }, { "epoch": 0.11722437477985206, "grad_norm": 1.385042176393814, "learning_rate": 1.9603649539665233e-05, "loss": 0.6378, "step": 832 }, { "epoch": 0.11736526946107785, "grad_norm": 1.1822148413897164, "learning_rate": 1.9602376451642692e-05, "loss": 0.9398, "step": 833 }, { "epoch": 0.11750616414230362, "grad_norm": 1.2529739088888583, "learning_rate": 1.9601101363773275e-05, "loss": 0.5275, "step": 834 }, { "epoch": 0.11764705882352941, "grad_norm": 1.1667755372476947, "learning_rate": 1.959982427632255e-05, "loss": 0.969, "step": 835 }, { "epoch": 0.1177879535047552, "grad_norm": 1.169090077491052, "learning_rate": 1.959854518955649e-05, "loss": 0.9232, "step": 836 }, { "epoch": 0.11792884818598098, "grad_norm": 1.1800529392363177, "learning_rate": 1.9597264103741473e-05, "loss": 0.9747, "step": 837 }, { "epoch": 0.11806974286720676, "grad_norm": 1.0850369889403872, "learning_rate": 1.9595981019144318e-05, "loss": 0.9932, "step": 838 }, { "epoch": 0.11821063754843254, "grad_norm": 1.0994988341621386, "learning_rate": 1.959469593603224e-05, "loss": 0.9398, "step": 839 }, { "epoch": 0.11835153222965833, "grad_norm": 1.457560276063898, "learning_rate": 1.9593408854672886e-05, "loss": 0.5084, "step": 840 }, { "epoch": 0.11849242691088412, "grad_norm": 1.0431047424876738, "learning_rate": 1.9592119775334302e-05, "loss": 0.954, "step": 841 }, { "epoch": 0.1186333215921099, "grad_norm": 1.4146087178018436, "learning_rate": 1.9590828698284965e-05, "loss": 0.5053, "step": 842 }, { "epoch": 0.11877421627333568, "grad_norm": 1.5305483252459777, "learning_rate": 1.9589535623793763e-05, "loss": 0.5383, "step": 843 }, { "epoch": 0.11891511095456146, "grad_norm": 1.0704108156515382, "learning_rate": 1.9588240552129993e-05, "loss": 0.9843, "step": 844 }, { "epoch": 0.11905600563578725, "grad_norm": 1.1107822457859748, "learning_rate": 1.958694348356338e-05, "loss": 0.9974, "step": 845 }, { "epoch": 0.11919690031701304, "grad_norm": 1.2853909734877993, "learning_rate": 1.9585644418364058e-05, "loss": 0.4705, "step": 846 }, { "epoch": 0.11933779499823882, "grad_norm": 0.985155720904314, "learning_rate": 1.9584343356802576e-05, "loss": 0.929, "step": 847 }, { "epoch": 0.1194786896794646, "grad_norm": 0.981275251999029, "learning_rate": 1.9583040299149897e-05, "loss": 0.9364, "step": 848 }, { "epoch": 0.11961958436069038, "grad_norm": 1.2009586702095496, "learning_rate": 1.9581735245677412e-05, "loss": 0.9985, "step": 849 }, { "epoch": 0.11976047904191617, "grad_norm": 1.0222285737781365, "learning_rate": 1.9580428196656916e-05, "loss": 0.9852, "step": 850 }, { "epoch": 0.11990137372314195, "grad_norm": 1.0582720683475368, "learning_rate": 1.9579119152360623e-05, "loss": 0.8998, "step": 851 }, { "epoch": 0.12004226840436774, "grad_norm": 1.6628990453700834, "learning_rate": 1.9577808113061158e-05, "loss": 0.5356, "step": 852 }, { "epoch": 0.12018316308559351, "grad_norm": 1.0138539139039688, "learning_rate": 1.957649507903157e-05, "loss": 0.8834, "step": 853 }, { "epoch": 0.1203240577668193, "grad_norm": 1.1391872856008982, "learning_rate": 1.9575180050545316e-05, "loss": 0.9465, "step": 854 }, { "epoch": 0.12046495244804509, "grad_norm": 1.3110281182906152, "learning_rate": 1.9573863027876276e-05, "loss": 0.9746, "step": 855 }, { "epoch": 0.12060584712927087, "grad_norm": 1.0309381590392777, "learning_rate": 1.957254401129873e-05, "loss": 0.8797, "step": 856 }, { "epoch": 0.12074674181049666, "grad_norm": 1.3183946912327047, "learning_rate": 1.9571223001087403e-05, "loss": 0.4386, "step": 857 }, { "epoch": 0.12088763649172243, "grad_norm": 1.0823262068166344, "learning_rate": 1.9569899997517398e-05, "loss": 0.952, "step": 858 }, { "epoch": 0.12102853117294822, "grad_norm": 1.3796471131004167, "learning_rate": 1.9568575000864264e-05, "loss": 0.5706, "step": 859 }, { "epoch": 0.121169425854174, "grad_norm": 1.4216770326884063, "learning_rate": 1.9567248011403943e-05, "loss": 0.9197, "step": 860 }, { "epoch": 0.1213103205353998, "grad_norm": 1.2504806781862017, "learning_rate": 1.9565919029412806e-05, "loss": 0.45, "step": 861 }, { "epoch": 0.12145121521662557, "grad_norm": 1.2659198832388838, "learning_rate": 1.9564588055167638e-05, "loss": 0.9775, "step": 862 }, { "epoch": 0.12159210989785135, "grad_norm": 1.387391335681911, "learning_rate": 1.956325508894563e-05, "loss": 0.6859, "step": 863 }, { "epoch": 0.12173300457907714, "grad_norm": 1.0789818771159336, "learning_rate": 1.9561920131024394e-05, "loss": 0.9217, "step": 864 }, { "epoch": 0.12187389926030293, "grad_norm": 1.0789551540556421, "learning_rate": 1.9560583181681957e-05, "loss": 0.9556, "step": 865 }, { "epoch": 0.12201479394152871, "grad_norm": 1.4891640271277913, "learning_rate": 1.955924424119676e-05, "loss": 0.6363, "step": 866 }, { "epoch": 0.12215568862275449, "grad_norm": 0.9699940123092708, "learning_rate": 1.955790330984766e-05, "loss": 0.9532, "step": 867 }, { "epoch": 0.12229658330398027, "grad_norm": 1.0863924729654137, "learning_rate": 1.9556560387913926e-05, "loss": 0.9374, "step": 868 }, { "epoch": 0.12243747798520606, "grad_norm": 1.1857132037706695, "learning_rate": 1.955521547567524e-05, "loss": 1.007, "step": 869 }, { "epoch": 0.12257837266643185, "grad_norm": 1.2179872017174211, "learning_rate": 1.955386857341171e-05, "loss": 0.9171, "step": 870 }, { "epoch": 0.12271926734765763, "grad_norm": 0.9883437830715621, "learning_rate": 1.9552519681403835e-05, "loss": 0.9755, "step": 871 }, { "epoch": 0.1228601620288834, "grad_norm": 1.2056016528558378, "learning_rate": 1.9551168799932558e-05, "loss": 0.5658, "step": 872 }, { "epoch": 0.12300105671010919, "grad_norm": 1.0349603811260213, "learning_rate": 1.954981592927921e-05, "loss": 0.8657, "step": 873 }, { "epoch": 0.12314195139133498, "grad_norm": 1.0933512966496965, "learning_rate": 1.954846106972556e-05, "loss": 0.922, "step": 874 }, { "epoch": 0.12328284607256076, "grad_norm": 1.2370358957015042, "learning_rate": 1.954710422155377e-05, "loss": 0.4952, "step": 875 }, { "epoch": 0.12342374075378655, "grad_norm": 1.418854922559045, "learning_rate": 1.954574538504642e-05, "loss": 0.6168, "step": 876 }, { "epoch": 0.12356463543501232, "grad_norm": 1.1164502208316462, "learning_rate": 1.9544384560486524e-05, "loss": 0.9472, "step": 877 }, { "epoch": 0.12370553011623811, "grad_norm": 1.1540551349936323, "learning_rate": 1.9543021748157483e-05, "loss": 0.9502, "step": 878 }, { "epoch": 0.1238464247974639, "grad_norm": 1.2238034759829182, "learning_rate": 1.9541656948343132e-05, "loss": 0.9636, "step": 879 }, { "epoch": 0.12398731947868968, "grad_norm": 1.274563658962095, "learning_rate": 1.9540290161327704e-05, "loss": 0.4972, "step": 880 }, { "epoch": 0.12412821415991546, "grad_norm": 1.0042710629072815, "learning_rate": 1.9538921387395863e-05, "loss": 0.9378, "step": 881 }, { "epoch": 0.12426910884114124, "grad_norm": 1.0490404878248545, "learning_rate": 1.9537550626832673e-05, "loss": 0.9689, "step": 882 }, { "epoch": 0.12441000352236703, "grad_norm": 1.0910740196748019, "learning_rate": 1.953617787992361e-05, "loss": 0.9679, "step": 883 }, { "epoch": 0.12455089820359282, "grad_norm": 1.1722965487594228, "learning_rate": 1.9534803146954585e-05, "loss": 0.9949, "step": 884 }, { "epoch": 0.1246917928848186, "grad_norm": 1.0070162911176548, "learning_rate": 1.9533426428211902e-05, "loss": 0.9229, "step": 885 }, { "epoch": 0.12483268756604438, "grad_norm": 1.4968004848995666, "learning_rate": 1.953204772398228e-05, "loss": 0.5271, "step": 886 }, { "epoch": 0.12497358224727016, "grad_norm": 1.1725770461894869, "learning_rate": 1.9530667034552858e-05, "loss": 1.0029, "step": 887 }, { "epoch": 0.12511447692849595, "grad_norm": 1.0678086152718163, "learning_rate": 1.952928436021119e-05, "loss": 0.9492, "step": 888 }, { "epoch": 0.12525537160972172, "grad_norm": 1.0693458025609703, "learning_rate": 1.952789970124523e-05, "loss": 0.9172, "step": 889 }, { "epoch": 0.12539626629094752, "grad_norm": 1.424179587358167, "learning_rate": 1.952651305794337e-05, "loss": 0.5506, "step": 890 }, { "epoch": 0.1255371609721733, "grad_norm": 1.0125856758551666, "learning_rate": 1.952512443059439e-05, "loss": 0.8915, "step": 891 }, { "epoch": 0.1256780556533991, "grad_norm": 1.1573468556681372, "learning_rate": 1.9523733819487494e-05, "loss": 0.9857, "step": 892 }, { "epoch": 0.12581895033462487, "grad_norm": 1.4950970097255227, "learning_rate": 1.9522341224912306e-05, "loss": 0.5311, "step": 893 }, { "epoch": 0.12595984501585064, "grad_norm": 1.1107392603498696, "learning_rate": 1.9520946647158844e-05, "loss": 1.0258, "step": 894 }, { "epoch": 0.12610073969707644, "grad_norm": 1.3250394792334497, "learning_rate": 1.9519550086517563e-05, "loss": 0.4026, "step": 895 }, { "epoch": 0.12624163437830221, "grad_norm": 1.1052886034149325, "learning_rate": 1.951815154327931e-05, "loss": 0.9402, "step": 896 }, { "epoch": 0.12638252905952801, "grad_norm": 1.1664066504811486, "learning_rate": 1.9516751017735364e-05, "loss": 0.4354, "step": 897 }, { "epoch": 0.1265234237407538, "grad_norm": 1.1248801973957419, "learning_rate": 1.951534851017739e-05, "loss": 0.9179, "step": 898 }, { "epoch": 0.12666431842197956, "grad_norm": 1.1106724054933104, "learning_rate": 1.9513944020897502e-05, "loss": 0.9495, "step": 899 }, { "epoch": 0.12680521310320536, "grad_norm": 1.0065802571408389, "learning_rate": 1.9512537550188192e-05, "loss": 0.9607, "step": 900 }, { "epoch": 0.12694610778443113, "grad_norm": 1.2670561805933112, "learning_rate": 1.9511129098342385e-05, "loss": 0.975, "step": 901 }, { "epoch": 0.12708700246565693, "grad_norm": 1.5067139959482745, "learning_rate": 1.9509718665653418e-05, "loss": 0.6472, "step": 902 }, { "epoch": 0.1272278971468827, "grad_norm": 1.230695554999981, "learning_rate": 1.9508306252415026e-05, "loss": 0.6185, "step": 903 }, { "epoch": 0.12736879182810848, "grad_norm": 1.3635299575559263, "learning_rate": 1.9506891858921376e-05, "loss": 0.5703, "step": 904 }, { "epoch": 0.12750968650933428, "grad_norm": 1.1219663494643715, "learning_rate": 1.950547548546703e-05, "loss": 0.9635, "step": 905 }, { "epoch": 0.12765058119056005, "grad_norm": 1.187139990772807, "learning_rate": 1.9504057132346978e-05, "loss": 0.9529, "step": 906 }, { "epoch": 0.12779147587178585, "grad_norm": 1.0500151538650102, "learning_rate": 1.9502636799856607e-05, "loss": 0.9122, "step": 907 }, { "epoch": 0.12793237055301163, "grad_norm": 1.0333556680765008, "learning_rate": 1.9501214488291733e-05, "loss": 0.9581, "step": 908 }, { "epoch": 0.1280732652342374, "grad_norm": 1.0477949087199514, "learning_rate": 1.9499790197948566e-05, "loss": 0.9831, "step": 909 }, { "epoch": 0.1282141599154632, "grad_norm": 1.273504412512679, "learning_rate": 1.9498363929123737e-05, "loss": 0.4538, "step": 910 }, { "epoch": 0.12835505459668897, "grad_norm": 1.3854940771304303, "learning_rate": 1.9496935682114296e-05, "loss": 0.4851, "step": 911 }, { "epoch": 0.12849594927791474, "grad_norm": 1.476508978424606, "learning_rate": 1.9495505457217692e-05, "loss": 0.6401, "step": 912 }, { "epoch": 0.12863684395914055, "grad_norm": 1.170788409513499, "learning_rate": 1.9494073254731796e-05, "loss": 0.9164, "step": 913 }, { "epoch": 0.12877773864036632, "grad_norm": 1.041189510186975, "learning_rate": 1.9492639074954884e-05, "loss": 0.8506, "step": 914 }, { "epoch": 0.12891863332159212, "grad_norm": 1.1307740029387612, "learning_rate": 1.949120291818565e-05, "loss": 0.9073, "step": 915 }, { "epoch": 0.1290595280028179, "grad_norm": 1.2195781073312653, "learning_rate": 1.948976478472319e-05, "loss": 0.9738, "step": 916 }, { "epoch": 0.12920042268404366, "grad_norm": 1.3244887988519973, "learning_rate": 1.9488324674867026e-05, "loss": 0.4438, "step": 917 }, { "epoch": 0.12934131736526946, "grad_norm": 1.0619962356476926, "learning_rate": 1.9486882588917077e-05, "loss": 0.9322, "step": 918 }, { "epoch": 0.12948221204649524, "grad_norm": 1.2060947536700979, "learning_rate": 1.9485438527173684e-05, "loss": 0.9423, "step": 919 }, { "epoch": 0.12962310672772104, "grad_norm": 1.2135588065077905, "learning_rate": 1.9483992489937592e-05, "loss": 0.9716, "step": 920 }, { "epoch": 0.1297640014089468, "grad_norm": 1.1719880616349205, "learning_rate": 1.9482544477509968e-05, "loss": 0.9812, "step": 921 }, { "epoch": 0.12990489609017258, "grad_norm": 1.107217231044824, "learning_rate": 1.948109449019238e-05, "loss": 0.9439, "step": 922 }, { "epoch": 0.13004579077139838, "grad_norm": 1.0142625627855901, "learning_rate": 1.9479642528286806e-05, "loss": 0.98, "step": 923 }, { "epoch": 0.13018668545262416, "grad_norm": 1.1416271700397227, "learning_rate": 1.9478188592095647e-05, "loss": 0.9279, "step": 924 }, { "epoch": 0.13032758013384996, "grad_norm": 0.9425912344850674, "learning_rate": 1.9476732681921703e-05, "loss": 0.9587, "step": 925 }, { "epoch": 0.13046847481507573, "grad_norm": 1.039579888103174, "learning_rate": 1.9475274798068197e-05, "loss": 0.9277, "step": 926 }, { "epoch": 0.1306093694963015, "grad_norm": 1.0537665494247672, "learning_rate": 1.9473814940838747e-05, "loss": 0.9486, "step": 927 }, { "epoch": 0.1307502641775273, "grad_norm": 1.1488913866069128, "learning_rate": 1.9472353110537398e-05, "loss": 0.9922, "step": 928 }, { "epoch": 0.13089115885875308, "grad_norm": 1.1450991364975918, "learning_rate": 1.9470889307468603e-05, "loss": 0.9609, "step": 929 }, { "epoch": 0.13103205353997888, "grad_norm": 1.312124128790232, "learning_rate": 1.946942353193721e-05, "loss": 0.5816, "step": 930 }, { "epoch": 0.13117294822120465, "grad_norm": 1.2309231077575196, "learning_rate": 1.94679557842485e-05, "loss": 0.4948, "step": 931 }, { "epoch": 0.13131384290243042, "grad_norm": 1.0679584481908395, "learning_rate": 1.9466486064708153e-05, "loss": 0.9557, "step": 932 }, { "epoch": 0.13145473758365622, "grad_norm": 1.1179913669320578, "learning_rate": 1.9465014373622258e-05, "loss": 0.9764, "step": 933 }, { "epoch": 0.131595632264882, "grad_norm": 1.1222270296457717, "learning_rate": 1.946354071129732e-05, "loss": 0.9968, "step": 934 }, { "epoch": 0.1317365269461078, "grad_norm": 1.2774724811045504, "learning_rate": 1.9462065078040255e-05, "loss": 0.9574, "step": 935 }, { "epoch": 0.13187742162733357, "grad_norm": 1.0431108223093548, "learning_rate": 1.946058747415838e-05, "loss": 0.9353, "step": 936 }, { "epoch": 0.13201831630855934, "grad_norm": 1.0257483092283253, "learning_rate": 1.9459107899959436e-05, "loss": 0.9477, "step": 937 }, { "epoch": 0.13215921098978514, "grad_norm": 1.3447564992355256, "learning_rate": 1.9457626355751564e-05, "loss": 0.5127, "step": 938 }, { "epoch": 0.13230010567101091, "grad_norm": 1.1472264028852508, "learning_rate": 1.9456142841843322e-05, "loss": 0.9169, "step": 939 }, { "epoch": 0.13244100035223672, "grad_norm": 1.0245751022107425, "learning_rate": 1.9454657358543674e-05, "loss": 0.9425, "step": 940 }, { "epoch": 0.1325818950334625, "grad_norm": 1.144796722830105, "learning_rate": 1.9453169906161994e-05, "loss": 0.9863, "step": 941 }, { "epoch": 0.13272278971468826, "grad_norm": 1.055566020971941, "learning_rate": 1.9451680485008075e-05, "loss": 1.0154, "step": 942 }, { "epoch": 0.13286368439591406, "grad_norm": 1.0312187514860045, "learning_rate": 1.9450189095392097e-05, "loss": 0.9285, "step": 943 }, { "epoch": 0.13300457907713983, "grad_norm": 1.4435899547477058, "learning_rate": 1.944869573762468e-05, "loss": 0.5472, "step": 944 }, { "epoch": 0.13314547375836563, "grad_norm": 1.0317806521232333, "learning_rate": 1.9447200412016835e-05, "loss": 0.9469, "step": 945 }, { "epoch": 0.1332863684395914, "grad_norm": 1.3905566000605194, "learning_rate": 1.9445703118879984e-05, "loss": 0.63, "step": 946 }, { "epoch": 0.13342726312081718, "grad_norm": 1.05770089204736, "learning_rate": 1.9444203858525962e-05, "loss": 0.9817, "step": 947 }, { "epoch": 0.13356815780204298, "grad_norm": 1.122240268298328, "learning_rate": 1.944270263126702e-05, "loss": 1.0338, "step": 948 }, { "epoch": 0.13370905248326875, "grad_norm": 1.0507814610073216, "learning_rate": 1.9441199437415807e-05, "loss": 0.8684, "step": 949 }, { "epoch": 0.13384994716449455, "grad_norm": 1.130405260160385, "learning_rate": 1.9439694277285386e-05, "loss": 0.952, "step": 950 }, { "epoch": 0.13399084184572033, "grad_norm": 1.1125288353221636, "learning_rate": 1.943818715118924e-05, "loss": 1.0072, "step": 951 }, { "epoch": 0.1341317365269461, "grad_norm": 1.3120513290445721, "learning_rate": 1.9436678059441238e-05, "loss": 0.5749, "step": 952 }, { "epoch": 0.1342726312081719, "grad_norm": 1.3495161980467514, "learning_rate": 1.9435167002355683e-05, "loss": 0.5276, "step": 953 }, { "epoch": 0.13441352588939767, "grad_norm": 1.1290492988028695, "learning_rate": 1.9433653980247267e-05, "loss": 0.9277, "step": 954 }, { "epoch": 0.13455442057062345, "grad_norm": 1.1122550876476214, "learning_rate": 1.943213899343111e-05, "loss": 0.9275, "step": 955 }, { "epoch": 0.13469531525184925, "grad_norm": 1.0467655103950362, "learning_rate": 1.943062204222273e-05, "loss": 0.9846, "step": 956 }, { "epoch": 0.13483620993307502, "grad_norm": 1.0226596447587089, "learning_rate": 1.942910312693805e-05, "loss": 0.9646, "step": 957 }, { "epoch": 0.13497710461430082, "grad_norm": 1.5401528535985296, "learning_rate": 1.9427582247893412e-05, "loss": 0.5506, "step": 958 }, { "epoch": 0.1351179992955266, "grad_norm": 1.1403389089258924, "learning_rate": 1.9426059405405566e-05, "loss": 0.9126, "step": 959 }, { "epoch": 0.13525889397675236, "grad_norm": 1.1223980723346247, "learning_rate": 1.9424534599791663e-05, "loss": 0.9934, "step": 960 }, { "epoch": 0.13539978865797817, "grad_norm": 1.1308812149284817, "learning_rate": 1.9423007831369275e-05, "loss": 1.0347, "step": 961 }, { "epoch": 0.13554068333920394, "grad_norm": 1.208371200180514, "learning_rate": 1.9421479100456372e-05, "loss": 0.9812, "step": 962 }, { "epoch": 0.13568157802042974, "grad_norm": 1.313802794239537, "learning_rate": 1.941994840737133e-05, "loss": 0.996, "step": 963 }, { "epoch": 0.1358224727016555, "grad_norm": 1.1206026537146843, "learning_rate": 1.9418415752432948e-05, "loss": 0.9538, "step": 964 }, { "epoch": 0.13596336738288128, "grad_norm": 1.420633179682969, "learning_rate": 1.9416881135960425e-05, "loss": 0.5125, "step": 965 }, { "epoch": 0.13610426206410708, "grad_norm": 1.1564374402883222, "learning_rate": 1.9415344558273365e-05, "loss": 0.9963, "step": 966 }, { "epoch": 0.13624515674533286, "grad_norm": 1.528178810255486, "learning_rate": 1.9413806019691788e-05, "loss": 0.5082, "step": 967 }, { "epoch": 0.13638605142655866, "grad_norm": 1.1362001690737928, "learning_rate": 1.9412265520536117e-05, "loss": 0.8845, "step": 968 }, { "epoch": 0.13652694610778443, "grad_norm": 1.0201848865213112, "learning_rate": 1.9410723061127186e-05, "loss": 0.9314, "step": 969 }, { "epoch": 0.1366678407890102, "grad_norm": 1.0593899672822653, "learning_rate": 1.9409178641786243e-05, "loss": 0.9201, "step": 970 }, { "epoch": 0.136808735470236, "grad_norm": 1.0365227409443403, "learning_rate": 1.9407632262834924e-05, "loss": 0.9456, "step": 971 }, { "epoch": 0.13694963015146178, "grad_norm": 1.1220978057422795, "learning_rate": 1.9406083924595297e-05, "loss": 0.9275, "step": 972 }, { "epoch": 0.13709052483268758, "grad_norm": 1.1327232383232215, "learning_rate": 1.9404533627389827e-05, "loss": 0.93, "step": 973 }, { "epoch": 0.13723141951391335, "grad_norm": 1.0932287543847325, "learning_rate": 1.940298137154139e-05, "loss": 0.9041, "step": 974 }, { "epoch": 0.13737231419513912, "grad_norm": 1.4818908393992052, "learning_rate": 1.9401427157373257e-05, "loss": 0.4749, "step": 975 }, { "epoch": 0.13751320887636492, "grad_norm": 1.3472848055933075, "learning_rate": 1.939987098520913e-05, "loss": 0.564, "step": 976 }, { "epoch": 0.1376541035575907, "grad_norm": 1.0430181307705866, "learning_rate": 1.9398312855373098e-05, "loss": 0.9316, "step": 977 }, { "epoch": 0.1377949982388165, "grad_norm": 1.0812394696671974, "learning_rate": 1.9396752768189673e-05, "loss": 0.9253, "step": 978 }, { "epoch": 0.13793589292004227, "grad_norm": 1.0795171067933742, "learning_rate": 1.9395190723983762e-05, "loss": 0.9582, "step": 979 }, { "epoch": 0.13807678760126804, "grad_norm": 1.2598472753820333, "learning_rate": 1.9393626723080688e-05, "loss": 1.062, "step": 980 }, { "epoch": 0.13821768228249384, "grad_norm": 1.3097436548230614, "learning_rate": 1.9392060765806185e-05, "loss": 0.6286, "step": 981 }, { "epoch": 0.13835857696371962, "grad_norm": 1.3125914846425808, "learning_rate": 1.9390492852486374e-05, "loss": 0.6556, "step": 982 }, { "epoch": 0.13849947164494542, "grad_norm": 1.0854596081532542, "learning_rate": 1.9388922983447814e-05, "loss": 0.9142, "step": 983 }, { "epoch": 0.1386403663261712, "grad_norm": 1.0396676115604655, "learning_rate": 1.9387351159017442e-05, "loss": 0.8573, "step": 984 }, { "epoch": 0.13878126100739696, "grad_norm": 1.039387156822931, "learning_rate": 1.938577737952262e-05, "loss": 1.0145, "step": 985 }, { "epoch": 0.13892215568862276, "grad_norm": 1.253747374706561, "learning_rate": 1.9384201645291115e-05, "loss": 0.9253, "step": 986 }, { "epoch": 0.13906305036984853, "grad_norm": 1.3743362047553986, "learning_rate": 1.9382623956651095e-05, "loss": 0.4922, "step": 987 }, { "epoch": 0.13920394505107433, "grad_norm": 1.276189333476114, "learning_rate": 1.9381044313931142e-05, "loss": 0.9551, "step": 988 }, { "epoch": 0.1393448397323001, "grad_norm": 1.0204467308885012, "learning_rate": 1.9379462717460243e-05, "loss": 0.918, "step": 989 }, { "epoch": 0.13948573441352588, "grad_norm": 1.0041085626413677, "learning_rate": 1.9377879167567785e-05, "loss": 0.9488, "step": 990 }, { "epoch": 0.13962662909475168, "grad_norm": 1.1977506763827495, "learning_rate": 1.9376293664583565e-05, "loss": 0.8818, "step": 991 }, { "epoch": 0.13976752377597745, "grad_norm": 1.2242146995313554, "learning_rate": 1.93747062088378e-05, "loss": 0.5059, "step": 992 }, { "epoch": 0.13990841845720325, "grad_norm": 1.2203015659133676, "learning_rate": 1.937311680066109e-05, "loss": 0.499, "step": 993 }, { "epoch": 0.14004931313842903, "grad_norm": 1.1437033093192517, "learning_rate": 1.9371525440384468e-05, "loss": 0.9594, "step": 994 }, { "epoch": 0.1401902078196548, "grad_norm": 1.482545300714916, "learning_rate": 1.936993212833935e-05, "loss": 0.6163, "step": 995 }, { "epoch": 0.1403311025008806, "grad_norm": 1.0726192895041884, "learning_rate": 1.9368336864857567e-05, "loss": 0.935, "step": 996 }, { "epoch": 0.14047199718210637, "grad_norm": 1.1921998952230946, "learning_rate": 1.936673965027137e-05, "loss": 0.9393, "step": 997 }, { "epoch": 0.14061289186333215, "grad_norm": 1.1267028425327124, "learning_rate": 1.936514048491339e-05, "loss": 0.9512, "step": 998 }, { "epoch": 0.14075378654455795, "grad_norm": 1.308872818379349, "learning_rate": 1.936353936911669e-05, "loss": 0.5154, "step": 999 }, { "epoch": 0.14089468122578372, "grad_norm": 1.2873783731590285, "learning_rate": 1.936193630321472e-05, "loss": 0.9379, "step": 1000 }, { "epoch": 0.14103557590700952, "grad_norm": 1.4288708151474874, "learning_rate": 1.9360331287541346e-05, "loss": 0.4994, "step": 1001 }, { "epoch": 0.1411764705882353, "grad_norm": 1.1436524352243134, "learning_rate": 1.935872432243084e-05, "loss": 0.9835, "step": 1002 }, { "epoch": 0.14131736526946106, "grad_norm": 1.3088729356112003, "learning_rate": 1.9357115408217875e-05, "loss": 0.5229, "step": 1003 }, { "epoch": 0.14145825995068687, "grad_norm": 1.08320615808755, "learning_rate": 1.9355504545237533e-05, "loss": 0.9061, "step": 1004 }, { "epoch": 0.14159915463191264, "grad_norm": 1.2816434607044715, "learning_rate": 1.9353891733825306e-05, "loss": 0.5553, "step": 1005 }, { "epoch": 0.14174004931313844, "grad_norm": 1.0675429338015479, "learning_rate": 1.935227697431708e-05, "loss": 0.9414, "step": 1006 }, { "epoch": 0.1418809439943642, "grad_norm": 1.3715635438042493, "learning_rate": 1.935066026704916e-05, "loss": 0.6757, "step": 1007 }, { "epoch": 0.14202183867558998, "grad_norm": 1.1239930289158475, "learning_rate": 1.9349041612358254e-05, "loss": 0.9942, "step": 1008 }, { "epoch": 0.14216273335681578, "grad_norm": 1.4798101362061786, "learning_rate": 1.9347421010581465e-05, "loss": 0.5951, "step": 1009 }, { "epoch": 0.14230362803804156, "grad_norm": 1.2668825704714861, "learning_rate": 1.9345798462056308e-05, "loss": 0.5672, "step": 1010 }, { "epoch": 0.14244452271926736, "grad_norm": 1.0427264708436244, "learning_rate": 1.9344173967120714e-05, "loss": 0.9757, "step": 1011 }, { "epoch": 0.14258541740049313, "grad_norm": 1.0826332095918463, "learning_rate": 1.9342547526113002e-05, "loss": 0.9552, "step": 1012 }, { "epoch": 0.1427263120817189, "grad_norm": 1.0100597529094888, "learning_rate": 1.9340919139371904e-05, "loss": 0.9386, "step": 1013 }, { "epoch": 0.1428672067629447, "grad_norm": 1.5264043904705045, "learning_rate": 1.933928880723656e-05, "loss": 0.5387, "step": 1014 }, { "epoch": 0.14300810144417048, "grad_norm": 1.0687537931505295, "learning_rate": 1.933765653004652e-05, "loss": 0.9086, "step": 1015 }, { "epoch": 0.14314899612539628, "grad_norm": 0.9951232185535719, "learning_rate": 1.9336022308141715e-05, "loss": 0.9511, "step": 1016 }, { "epoch": 0.14328989080662205, "grad_norm": 1.3873912401231758, "learning_rate": 1.9334386141862505e-05, "loss": 0.658, "step": 1017 }, { "epoch": 0.14343078548784782, "grad_norm": 1.0748198559297029, "learning_rate": 1.9332748031549656e-05, "loss": 0.9481, "step": 1018 }, { "epoch": 0.14357168016907362, "grad_norm": 1.122950764681062, "learning_rate": 1.9331107977544317e-05, "loss": 0.9312, "step": 1019 }, { "epoch": 0.1437125748502994, "grad_norm": 1.1741509462150665, "learning_rate": 1.9329465980188064e-05, "loss": 0.9531, "step": 1020 }, { "epoch": 0.1438534695315252, "grad_norm": 1.3641198256205636, "learning_rate": 1.9327822039822863e-05, "loss": 0.4829, "step": 1021 }, { "epoch": 0.14399436421275097, "grad_norm": 1.0883935992401392, "learning_rate": 1.9326176156791097e-05, "loss": 0.9995, "step": 1022 }, { "epoch": 0.14413525889397674, "grad_norm": 1.087740657753808, "learning_rate": 1.9324528331435543e-05, "loss": 0.8945, "step": 1023 }, { "epoch": 0.14427615357520254, "grad_norm": 1.3034862696058143, "learning_rate": 1.9322878564099386e-05, "loss": 0.5537, "step": 1024 }, { "epoch": 0.14441704825642832, "grad_norm": 1.535482561289593, "learning_rate": 1.932122685512622e-05, "loss": 0.5084, "step": 1025 }, { "epoch": 0.14455794293765412, "grad_norm": 1.4305888380451037, "learning_rate": 1.9319573204860038e-05, "loss": 0.5805, "step": 1026 }, { "epoch": 0.1446988376188799, "grad_norm": 1.1704521047683185, "learning_rate": 1.9317917613645236e-05, "loss": 0.948, "step": 1027 }, { "epoch": 0.14483973230010566, "grad_norm": 1.1453781640279785, "learning_rate": 1.931626008182662e-05, "loss": 0.471, "step": 1028 }, { "epoch": 0.14498062698133146, "grad_norm": 1.0257173345169213, "learning_rate": 1.9314600609749396e-05, "loss": 0.9477, "step": 1029 }, { "epoch": 0.14512152166255723, "grad_norm": 1.1388766010075508, "learning_rate": 1.9312939197759177e-05, "loss": 0.9736, "step": 1030 }, { "epoch": 0.14526241634378304, "grad_norm": 0.9996208184807663, "learning_rate": 1.931127584620198e-05, "loss": 0.9576, "step": 1031 }, { "epoch": 0.1454033110250088, "grad_norm": 0.9927150753295733, "learning_rate": 1.9309610555424213e-05, "loss": 0.9141, "step": 1032 }, { "epoch": 0.14554420570623458, "grad_norm": 0.9602373285725526, "learning_rate": 1.9307943325772713e-05, "loss": 0.956, "step": 1033 }, { "epoch": 0.14568510038746038, "grad_norm": 1.4406587500338592, "learning_rate": 1.9306274157594702e-05, "loss": 0.5656, "step": 1034 }, { "epoch": 0.14582599506868615, "grad_norm": 1.4848091087829467, "learning_rate": 1.930460305123781e-05, "loss": 0.6969, "step": 1035 }, { "epoch": 0.14596688974991195, "grad_norm": 1.0620108557449082, "learning_rate": 1.9302930007050072e-05, "loss": 0.9233, "step": 1036 }, { "epoch": 0.14610778443113773, "grad_norm": 0.9301930439519143, "learning_rate": 1.930125502537992e-05, "loss": 0.9803, "step": 1037 }, { "epoch": 0.1462486791123635, "grad_norm": 1.0791936512076312, "learning_rate": 1.9299578106576207e-05, "loss": 0.9722, "step": 1038 }, { "epoch": 0.1463895737935893, "grad_norm": 1.2089037005501229, "learning_rate": 1.9297899250988172e-05, "loss": 0.8986, "step": 1039 }, { "epoch": 0.14653046847481507, "grad_norm": 1.3744223090640841, "learning_rate": 1.9296218458965462e-05, "loss": 0.5391, "step": 1040 }, { "epoch": 0.14667136315604085, "grad_norm": 1.0497312651269204, "learning_rate": 1.929453573085813e-05, "loss": 0.9257, "step": 1041 }, { "epoch": 0.14681225783726665, "grad_norm": 1.1334957125121972, "learning_rate": 1.9292851067016626e-05, "loss": 0.9529, "step": 1042 }, { "epoch": 0.14695315251849242, "grad_norm": 1.3170423056558356, "learning_rate": 1.9291164467791815e-05, "loss": 0.9427, "step": 1043 }, { "epoch": 0.14709404719971822, "grad_norm": 0.9451268265992946, "learning_rate": 1.9289475933534958e-05, "loss": 0.978, "step": 1044 }, { "epoch": 0.147234941880944, "grad_norm": 1.0261822192921102, "learning_rate": 1.928778546459771e-05, "loss": 0.9202, "step": 1045 }, { "epoch": 0.14737583656216977, "grad_norm": 0.9936573955696295, "learning_rate": 1.928609306133215e-05, "loss": 0.9338, "step": 1046 }, { "epoch": 0.14751673124339557, "grad_norm": 1.3264862610317838, "learning_rate": 1.9284398724090742e-05, "loss": 0.6428, "step": 1047 }, { "epoch": 0.14765762592462134, "grad_norm": 1.2898070303286244, "learning_rate": 1.9282702453226357e-05, "loss": 0.4225, "step": 1048 }, { "epoch": 0.14779852060584714, "grad_norm": 1.198816546109782, "learning_rate": 1.9281004249092273e-05, "loss": 0.9398, "step": 1049 }, { "epoch": 0.1479394152870729, "grad_norm": 0.9944435146196269, "learning_rate": 1.9279304112042165e-05, "loss": 0.8326, "step": 1050 }, { "epoch": 0.14808030996829868, "grad_norm": 1.0047320683309722, "learning_rate": 1.9277602042430115e-05, "loss": 0.957, "step": 1051 }, { "epoch": 0.14822120464952449, "grad_norm": 1.3313315510044768, "learning_rate": 1.927589804061061e-05, "loss": 0.5357, "step": 1052 }, { "epoch": 0.14836209933075026, "grad_norm": 1.1614417606742773, "learning_rate": 1.9274192106938524e-05, "loss": 0.9278, "step": 1053 }, { "epoch": 0.14850299401197606, "grad_norm": 1.1398856323005317, "learning_rate": 1.9272484241769155e-05, "loss": 0.9825, "step": 1054 }, { "epoch": 0.14864388869320183, "grad_norm": 1.035278160672675, "learning_rate": 1.9270774445458192e-05, "loss": 0.9764, "step": 1055 }, { "epoch": 0.1487847833744276, "grad_norm": 1.0228247442918112, "learning_rate": 1.926906271836172e-05, "loss": 0.943, "step": 1056 }, { "epoch": 0.1489256780556534, "grad_norm": 1.33619172210866, "learning_rate": 1.9267349060836246e-05, "loss": 0.4875, "step": 1057 }, { "epoch": 0.14906657273687918, "grad_norm": 1.1146462587836945, "learning_rate": 1.9265633473238657e-05, "loss": 0.9707, "step": 1058 }, { "epoch": 0.14920746741810498, "grad_norm": 1.3821851688659623, "learning_rate": 1.9263915955926247e-05, "loss": 0.5774, "step": 1059 }, { "epoch": 0.14934836209933075, "grad_norm": 1.194393942136231, "learning_rate": 1.926219650925673e-05, "loss": 0.4858, "step": 1060 }, { "epoch": 0.14948925678055652, "grad_norm": 1.1383107421909733, "learning_rate": 1.9260475133588193e-05, "loss": 0.9328, "step": 1061 }, { "epoch": 0.14963015146178232, "grad_norm": 1.0738620534450458, "learning_rate": 1.9258751829279152e-05, "loss": 0.945, "step": 1062 }, { "epoch": 0.1497710461430081, "grad_norm": 1.318241997945817, "learning_rate": 1.9257026596688507e-05, "loss": 0.9321, "step": 1063 }, { "epoch": 0.1499119408242339, "grad_norm": 1.337450953760277, "learning_rate": 1.925529943617556e-05, "loss": 0.6204, "step": 1064 }, { "epoch": 0.15005283550545967, "grad_norm": 1.026461605357864, "learning_rate": 1.925357034810003e-05, "loss": 0.941, "step": 1065 }, { "epoch": 0.15019373018668544, "grad_norm": 0.9816059376010277, "learning_rate": 1.925183933282202e-05, "loss": 0.8784, "step": 1066 }, { "epoch": 0.15033462486791124, "grad_norm": 1.1923907846380382, "learning_rate": 1.9250106390702045e-05, "loss": 0.9213, "step": 1067 }, { "epoch": 0.15047551954913702, "grad_norm": 1.2199991753754644, "learning_rate": 1.9248371522101013e-05, "loss": 0.9408, "step": 1068 }, { "epoch": 0.15061641423036282, "grad_norm": 1.1862873111602985, "learning_rate": 1.924663472738024e-05, "loss": 0.957, "step": 1069 }, { "epoch": 0.1507573089115886, "grad_norm": 0.9779167180146687, "learning_rate": 1.9244896006901443e-05, "loss": 0.9484, "step": 1070 }, { "epoch": 0.15089820359281436, "grad_norm": 1.4717183078486469, "learning_rate": 1.9243155361026736e-05, "loss": 0.9495, "step": 1071 }, { "epoch": 0.15103909827404016, "grad_norm": 1.172273209439218, "learning_rate": 1.9241412790118636e-05, "loss": 0.935, "step": 1072 }, { "epoch": 0.15117999295526593, "grad_norm": 1.0579461564658994, "learning_rate": 1.923966829454006e-05, "loss": 0.9588, "step": 1073 }, { "epoch": 0.15132088763649174, "grad_norm": 1.0435568292509732, "learning_rate": 1.9237921874654334e-05, "loss": 0.9499, "step": 1074 }, { "epoch": 0.1514617823177175, "grad_norm": 1.3382883253599358, "learning_rate": 1.9236173530825167e-05, "loss": 0.5253, "step": 1075 }, { "epoch": 0.15160267699894328, "grad_norm": 1.1758890190223226, "learning_rate": 1.9234423263416685e-05, "loss": 0.9271, "step": 1076 }, { "epoch": 0.15174357168016908, "grad_norm": 1.3244278554093165, "learning_rate": 1.9232671072793412e-05, "loss": 0.9876, "step": 1077 }, { "epoch": 0.15188446636139485, "grad_norm": 1.0355367862755058, "learning_rate": 1.9230916959320262e-05, "loss": 0.952, "step": 1078 }, { "epoch": 0.15202536104262065, "grad_norm": 1.0477167413677155, "learning_rate": 1.922916092336256e-05, "loss": 0.951, "step": 1079 }, { "epoch": 0.15216625572384643, "grad_norm": 1.1260432224086148, "learning_rate": 1.922740296528603e-05, "loss": 0.9238, "step": 1080 }, { "epoch": 0.1523071504050722, "grad_norm": 1.2217494810864313, "learning_rate": 1.9225643085456794e-05, "loss": 0.8617, "step": 1081 }, { "epoch": 0.152448045086298, "grad_norm": 1.0970245847701885, "learning_rate": 1.9223881284241372e-05, "loss": 0.9186, "step": 1082 }, { "epoch": 0.15258893976752377, "grad_norm": 1.0793825746489853, "learning_rate": 1.9222117562006693e-05, "loss": 0.8996, "step": 1083 }, { "epoch": 0.15272983444874955, "grad_norm": 1.0563568696553414, "learning_rate": 1.922035191912007e-05, "loss": 0.9571, "step": 1084 }, { "epoch": 0.15287072912997535, "grad_norm": 1.102347359607072, "learning_rate": 1.9218584355949237e-05, "loss": 0.9742, "step": 1085 }, { "epoch": 0.15301162381120112, "grad_norm": 1.2824995323607402, "learning_rate": 1.921681487286231e-05, "loss": 0.499, "step": 1086 }, { "epoch": 0.15315251849242692, "grad_norm": 1.028759335303356, "learning_rate": 1.921504347022782e-05, "loss": 0.9325, "step": 1087 }, { "epoch": 0.1532934131736527, "grad_norm": 1.0952956952251638, "learning_rate": 1.9213270148414676e-05, "loss": 0.9191, "step": 1088 }, { "epoch": 0.15343430785487847, "grad_norm": 1.2249903130668292, "learning_rate": 1.921149490779221e-05, "loss": 0.9325, "step": 1089 }, { "epoch": 0.15357520253610427, "grad_norm": 0.9225532348994375, "learning_rate": 1.9209717748730145e-05, "loss": 0.9602, "step": 1090 }, { "epoch": 0.15371609721733004, "grad_norm": 1.2145495851829857, "learning_rate": 1.9207938671598597e-05, "loss": 0.567, "step": 1091 }, { "epoch": 0.15385699189855584, "grad_norm": 1.33007226611119, "learning_rate": 1.920615767676809e-05, "loss": 0.5686, "step": 1092 }, { "epoch": 0.1539978865797816, "grad_norm": 1.0275953128492004, "learning_rate": 1.9204374764609544e-05, "loss": 0.95, "step": 1093 }, { "epoch": 0.15413878126100738, "grad_norm": 1.3731171043222936, "learning_rate": 1.920258993549428e-05, "loss": 0.5824, "step": 1094 }, { "epoch": 0.15427967594223319, "grad_norm": 1.3916449367895705, "learning_rate": 1.9200803189794014e-05, "loss": 0.6621, "step": 1095 }, { "epoch": 0.15442057062345896, "grad_norm": 1.0023767870884053, "learning_rate": 1.9199014527880865e-05, "loss": 0.9699, "step": 1096 }, { "epoch": 0.15456146530468476, "grad_norm": 1.0506971320947827, "learning_rate": 1.9197223950127353e-05, "loss": 1.0003, "step": 1097 }, { "epoch": 0.15470235998591053, "grad_norm": 1.0232535117921244, "learning_rate": 1.9195431456906388e-05, "loss": 0.9053, "step": 1098 }, { "epoch": 0.1548432546671363, "grad_norm": 1.292157980352121, "learning_rate": 1.9193637048591295e-05, "loss": 0.5196, "step": 1099 }, { "epoch": 0.1549841493483621, "grad_norm": 1.4484654895170284, "learning_rate": 1.9191840725555776e-05, "loss": 0.6144, "step": 1100 }, { "epoch": 0.15512504402958788, "grad_norm": 1.0865292637086492, "learning_rate": 1.9190042488173954e-05, "loss": 0.9743, "step": 1101 }, { "epoch": 0.15526593871081368, "grad_norm": 1.2856917299410584, "learning_rate": 1.9188242336820333e-05, "loss": 0.5265, "step": 1102 }, { "epoch": 0.15540683339203945, "grad_norm": 1.01248133835151, "learning_rate": 1.9186440271869826e-05, "loss": 0.9349, "step": 1103 }, { "epoch": 0.15554772807326522, "grad_norm": 1.2683681460306608, "learning_rate": 1.9184636293697742e-05, "loss": 0.4794, "step": 1104 }, { "epoch": 0.15568862275449102, "grad_norm": 1.0085710344025653, "learning_rate": 1.9182830402679786e-05, "loss": 0.9225, "step": 1105 }, { "epoch": 0.1558295174357168, "grad_norm": 1.1923165458565381, "learning_rate": 1.918102259919207e-05, "loss": 0.9449, "step": 1106 }, { "epoch": 0.1559704121169426, "grad_norm": 1.1186436594435942, "learning_rate": 1.9179212883611086e-05, "loss": 0.8962, "step": 1107 }, { "epoch": 0.15611130679816837, "grad_norm": 1.0619252569641766, "learning_rate": 1.917740125631375e-05, "loss": 0.9099, "step": 1108 }, { "epoch": 0.15625220147939414, "grad_norm": 1.358721950147128, "learning_rate": 1.917558771767735e-05, "loss": 0.6582, "step": 1109 }, { "epoch": 0.15639309616061994, "grad_norm": 1.3651957486791497, "learning_rate": 1.9173772268079588e-05, "loss": 0.6029, "step": 1110 }, { "epoch": 0.15653399084184572, "grad_norm": 1.0444902311870776, "learning_rate": 1.917195490789856e-05, "loss": 0.9032, "step": 1111 }, { "epoch": 0.15667488552307152, "grad_norm": 0.9985209176850915, "learning_rate": 1.9170135637512765e-05, "loss": 0.9072, "step": 1112 }, { "epoch": 0.1568157802042973, "grad_norm": 1.121752554757842, "learning_rate": 1.9168314457301087e-05, "loss": 0.8986, "step": 1113 }, { "epoch": 0.15695667488552306, "grad_norm": 1.4510967613038597, "learning_rate": 1.9166491367642822e-05, "loss": 0.5001, "step": 1114 }, { "epoch": 0.15709756956674886, "grad_norm": 1.1233567629259114, "learning_rate": 1.916466636891765e-05, "loss": 0.9188, "step": 1115 }, { "epoch": 0.15723846424797464, "grad_norm": 1.1107173527363665, "learning_rate": 1.9162839461505663e-05, "loss": 0.9608, "step": 1116 }, { "epoch": 0.15737935892920044, "grad_norm": 1.0597512844975296, "learning_rate": 1.9161010645787344e-05, "loss": 0.9933, "step": 1117 }, { "epoch": 0.1575202536104262, "grad_norm": 1.0282896191446125, "learning_rate": 1.9159179922143567e-05, "loss": 0.9016, "step": 1118 }, { "epoch": 0.15766114829165198, "grad_norm": 1.0573741764236289, "learning_rate": 1.915734729095561e-05, "loss": 0.9442, "step": 1119 }, { "epoch": 0.15780204297287778, "grad_norm": 1.0314216877045868, "learning_rate": 1.915551275260515e-05, "loss": 0.9171, "step": 1120 }, { "epoch": 0.15794293765410355, "grad_norm": 1.0491761043608343, "learning_rate": 1.915367630747426e-05, "loss": 1.0365, "step": 1121 }, { "epoch": 0.15808383233532936, "grad_norm": 1.3716892796243811, "learning_rate": 1.9151837955945406e-05, "loss": 0.4805, "step": 1122 }, { "epoch": 0.15822472701655513, "grad_norm": 1.034341155972074, "learning_rate": 1.9149997698401453e-05, "loss": 0.9697, "step": 1123 }, { "epoch": 0.1583656216977809, "grad_norm": 1.0344217395071802, "learning_rate": 1.914815553522567e-05, "loss": 0.9602, "step": 1124 }, { "epoch": 0.1585065163790067, "grad_norm": 1.099772520174904, "learning_rate": 1.9146311466801707e-05, "loss": 0.9122, "step": 1125 }, { "epoch": 0.15864741106023247, "grad_norm": 1.1096639085656577, "learning_rate": 1.9144465493513627e-05, "loss": 0.9042, "step": 1126 }, { "epoch": 0.15878830574145825, "grad_norm": 1.0329682385484358, "learning_rate": 1.9142617615745883e-05, "loss": 0.9787, "step": 1127 }, { "epoch": 0.15892920042268405, "grad_norm": 1.5276911167917877, "learning_rate": 1.9140767833883318e-05, "loss": 0.5921, "step": 1128 }, { "epoch": 0.15907009510390982, "grad_norm": 1.2777964601482963, "learning_rate": 1.9138916148311188e-05, "loss": 0.5292, "step": 1129 }, { "epoch": 0.15921098978513562, "grad_norm": 1.0373832698506882, "learning_rate": 1.913706255941513e-05, "loss": 0.9833, "step": 1130 }, { "epoch": 0.1593518844663614, "grad_norm": 1.3471711268424653, "learning_rate": 1.9135207067581186e-05, "loss": 0.4998, "step": 1131 }, { "epoch": 0.15949277914758717, "grad_norm": 1.068370944792267, "learning_rate": 1.9133349673195785e-05, "loss": 0.9439, "step": 1132 }, { "epoch": 0.15963367382881297, "grad_norm": 0.9959104267341184, "learning_rate": 1.913149037664577e-05, "loss": 0.958, "step": 1133 }, { "epoch": 0.15977456851003874, "grad_norm": 1.0857233563136466, "learning_rate": 1.9129629178318362e-05, "loss": 0.9497, "step": 1134 }, { "epoch": 0.15991546319126454, "grad_norm": 1.372239990281672, "learning_rate": 1.9127766078601185e-05, "loss": 0.5775, "step": 1135 }, { "epoch": 0.1600563578724903, "grad_norm": 1.2740508233542358, "learning_rate": 1.912590107788226e-05, "loss": 0.9182, "step": 1136 }, { "epoch": 0.16019725255371609, "grad_norm": 1.2738624371882954, "learning_rate": 1.912403417655e-05, "loss": 0.9798, "step": 1137 }, { "epoch": 0.16033814723494189, "grad_norm": 1.269527701731984, "learning_rate": 1.912216537499322e-05, "loss": 0.5133, "step": 1138 }, { "epoch": 0.16047904191616766, "grad_norm": 1.1548812871815113, "learning_rate": 1.912029467360113e-05, "loss": 0.9332, "step": 1139 }, { "epoch": 0.16061993659739346, "grad_norm": 1.0160239862292446, "learning_rate": 1.911842207276333e-05, "loss": 0.9035, "step": 1140 }, { "epoch": 0.16076083127861923, "grad_norm": 1.2148656261898512, "learning_rate": 1.9116547572869816e-05, "loss": 0.9332, "step": 1141 }, { "epoch": 0.160901725959845, "grad_norm": 1.1465046646969843, "learning_rate": 1.9114671174310987e-05, "loss": 0.9399, "step": 1142 }, { "epoch": 0.1610426206410708, "grad_norm": 1.382094034097077, "learning_rate": 1.9112792877477635e-05, "loss": 0.5265, "step": 1143 }, { "epoch": 0.16118351532229658, "grad_norm": 1.125437150616197, "learning_rate": 1.9110912682760936e-05, "loss": 0.9047, "step": 1144 }, { "epoch": 0.16132441000352238, "grad_norm": 1.3499905133368506, "learning_rate": 1.9109030590552477e-05, "loss": 0.659, "step": 1145 }, { "epoch": 0.16146530468474815, "grad_norm": 0.9876231494752773, "learning_rate": 1.910714660124423e-05, "loss": 0.9917, "step": 1146 }, { "epoch": 0.16160619936597392, "grad_norm": 1.2296497820460117, "learning_rate": 1.910526071522857e-05, "loss": 0.6473, "step": 1147 }, { "epoch": 0.16174709404719972, "grad_norm": 1.0475119460827116, "learning_rate": 1.910337293289826e-05, "loss": 0.9184, "step": 1148 }, { "epoch": 0.1618879887284255, "grad_norm": 1.0555848455185768, "learning_rate": 1.910148325464646e-05, "loss": 0.9764, "step": 1149 }, { "epoch": 0.1620288834096513, "grad_norm": 1.194798616277332, "learning_rate": 1.9099591680866724e-05, "loss": 0.5324, "step": 1150 }, { "epoch": 0.16216977809087707, "grad_norm": 1.1822742696691277, "learning_rate": 1.9097698211953007e-05, "loss": 0.9105, "step": 1151 }, { "epoch": 0.16231067277210284, "grad_norm": 1.0704377658870619, "learning_rate": 1.9095802848299648e-05, "loss": 0.9921, "step": 1152 }, { "epoch": 0.16245156745332864, "grad_norm": 1.123877287584216, "learning_rate": 1.9093905590301388e-05, "loss": 0.9096, "step": 1153 }, { "epoch": 0.16259246213455442, "grad_norm": 0.9609566701365443, "learning_rate": 1.909200643835337e-05, "loss": 0.946, "step": 1154 }, { "epoch": 0.16273335681578022, "grad_norm": 1.0313649456459086, "learning_rate": 1.909010539285111e-05, "loss": 0.9921, "step": 1155 }, { "epoch": 0.162874251497006, "grad_norm": 1.4294345731946532, "learning_rate": 1.9088202454190535e-05, "loss": 0.5486, "step": 1156 }, { "epoch": 0.16301514617823176, "grad_norm": 0.9335555509025185, "learning_rate": 1.9086297622767963e-05, "loss": 0.8956, "step": 1157 }, { "epoch": 0.16315604085945756, "grad_norm": 1.0779460401565395, "learning_rate": 1.9084390898980105e-05, "loss": 0.9435, "step": 1158 }, { "epoch": 0.16329693554068334, "grad_norm": 0.9974930771156332, "learning_rate": 1.908248228322407e-05, "loss": 0.9317, "step": 1159 }, { "epoch": 0.16343783022190914, "grad_norm": 1.015273156356557, "learning_rate": 1.908057177589735e-05, "loss": 0.8628, "step": 1160 }, { "epoch": 0.1635787249031349, "grad_norm": 1.4251752697070859, "learning_rate": 1.9078659377397842e-05, "loss": 0.515, "step": 1161 }, { "epoch": 0.16371961958436068, "grad_norm": 1.7115756664344903, "learning_rate": 1.9076745088123837e-05, "loss": 0.6099, "step": 1162 }, { "epoch": 0.16386051426558648, "grad_norm": 1.1524788416468836, "learning_rate": 1.9074828908474012e-05, "loss": 0.9607, "step": 1163 }, { "epoch": 0.16400140894681225, "grad_norm": 1.0551705228715684, "learning_rate": 1.907291083884744e-05, "loss": 0.9457, "step": 1164 }, { "epoch": 0.16414230362803806, "grad_norm": 1.1171816317052425, "learning_rate": 1.9070990879643597e-05, "loss": 0.9018, "step": 1165 }, { "epoch": 0.16428319830926383, "grad_norm": 0.9961402587006316, "learning_rate": 1.906906903126233e-05, "loss": 0.8571, "step": 1166 }, { "epoch": 0.1644240929904896, "grad_norm": 1.004698398829442, "learning_rate": 1.9067145294103912e-05, "loss": 0.9292, "step": 1167 }, { "epoch": 0.1645649876717154, "grad_norm": 1.1213902419304824, "learning_rate": 1.9065219668568984e-05, "loss": 0.9453, "step": 1168 }, { "epoch": 0.16470588235294117, "grad_norm": 1.0687920189989524, "learning_rate": 1.9063292155058586e-05, "loss": 0.9603, "step": 1169 }, { "epoch": 0.16484677703416695, "grad_norm": 1.080648565224002, "learning_rate": 1.9061362753974153e-05, "loss": 0.9397, "step": 1170 }, { "epoch": 0.16498767171539275, "grad_norm": 1.1241348580895685, "learning_rate": 1.9059431465717516e-05, "loss": 0.9409, "step": 1171 }, { "epoch": 0.16512856639661852, "grad_norm": 1.591686014060754, "learning_rate": 1.9057498290690896e-05, "loss": 0.7407, "step": 1172 }, { "epoch": 0.16526946107784432, "grad_norm": 0.9656039422427164, "learning_rate": 1.9055563229296908e-05, "loss": 0.9057, "step": 1173 }, { "epoch": 0.1654103557590701, "grad_norm": 1.0841068550709494, "learning_rate": 1.9053626281938556e-05, "loss": 0.9207, "step": 1174 }, { "epoch": 0.16555125044029587, "grad_norm": 1.0791248827912245, "learning_rate": 1.9051687449019243e-05, "loss": 0.971, "step": 1175 }, { "epoch": 0.16569214512152167, "grad_norm": 1.3435648651181342, "learning_rate": 1.904974673094276e-05, "loss": 0.9523, "step": 1176 }, { "epoch": 0.16583303980274744, "grad_norm": 1.0120331904306212, "learning_rate": 1.9047804128113295e-05, "loss": 0.9999, "step": 1177 }, { "epoch": 0.16597393448397324, "grad_norm": 1.4415835913086816, "learning_rate": 1.904585964093542e-05, "loss": 0.5241, "step": 1178 }, { "epoch": 0.166114829165199, "grad_norm": 1.0768373211369084, "learning_rate": 1.904391326981411e-05, "loss": 0.9755, "step": 1179 }, { "epoch": 0.16625572384642479, "grad_norm": 1.0339136865617584, "learning_rate": 1.9041965015154726e-05, "loss": 0.9417, "step": 1180 }, { "epoch": 0.16639661852765059, "grad_norm": 0.9303362695162946, "learning_rate": 1.9040014877363024e-05, "loss": 0.9025, "step": 1181 }, { "epoch": 0.16653751320887636, "grad_norm": 1.0305010097017389, "learning_rate": 1.9038062856845152e-05, "loss": 1.0025, "step": 1182 }, { "epoch": 0.16667840789010216, "grad_norm": 1.0980861403779472, "learning_rate": 1.9036108954007646e-05, "loss": 0.9312, "step": 1183 }, { "epoch": 0.16681930257132793, "grad_norm": 1.0839124320717657, "learning_rate": 1.903415316925744e-05, "loss": 0.9803, "step": 1184 }, { "epoch": 0.1669601972525537, "grad_norm": 1.0471094801131553, "learning_rate": 1.9032195503001852e-05, "loss": 0.8988, "step": 1185 }, { "epoch": 0.1671010919337795, "grad_norm": 1.0055035597025286, "learning_rate": 1.9030235955648604e-05, "loss": 0.9012, "step": 1186 }, { "epoch": 0.16724198661500528, "grad_norm": 1.3874997747298687, "learning_rate": 1.90282745276058e-05, "loss": 0.533, "step": 1187 }, { "epoch": 0.16738288129623108, "grad_norm": 1.0316078385172125, "learning_rate": 1.9026311219281937e-05, "loss": 0.9055, "step": 1188 }, { "epoch": 0.16752377597745685, "grad_norm": 1.1819524970437256, "learning_rate": 1.902434603108591e-05, "loss": 0.474, "step": 1189 }, { "epoch": 0.16766467065868262, "grad_norm": 1.5237263310457043, "learning_rate": 1.902237896342699e-05, "loss": 0.5282, "step": 1190 }, { "epoch": 0.16780556533990842, "grad_norm": 1.0593390924574806, "learning_rate": 1.902041001671486e-05, "loss": 0.93, "step": 1191 }, { "epoch": 0.1679464600211342, "grad_norm": 1.3309164620416316, "learning_rate": 1.9018439191359577e-05, "loss": 0.9414, "step": 1192 }, { "epoch": 0.16808735470236, "grad_norm": 1.2834935028031136, "learning_rate": 1.9016466487771604e-05, "loss": 0.5234, "step": 1193 }, { "epoch": 0.16822824938358577, "grad_norm": 1.002303407129191, "learning_rate": 1.9014491906361785e-05, "loss": 0.9837, "step": 1194 }, { "epoch": 0.16836914406481154, "grad_norm": 0.9795969909550132, "learning_rate": 1.9012515447541356e-05, "loss": 0.9369, "step": 1195 }, { "epoch": 0.16851003874603734, "grad_norm": 1.4885921912084528, "learning_rate": 1.9010537111721946e-05, "loss": 0.5356, "step": 1196 }, { "epoch": 0.16865093342726312, "grad_norm": 1.2188820914919585, "learning_rate": 1.9008556899315574e-05, "loss": 0.5043, "step": 1197 }, { "epoch": 0.16879182810848892, "grad_norm": 1.12460735339341, "learning_rate": 1.9006574810734656e-05, "loss": 0.9763, "step": 1198 }, { "epoch": 0.1689327227897147, "grad_norm": 1.0958514111199784, "learning_rate": 1.9004590846391988e-05, "loss": 0.9871, "step": 1199 }, { "epoch": 0.16907361747094046, "grad_norm": 1.5169540600527378, "learning_rate": 1.9002605006700763e-05, "loss": 0.5645, "step": 1200 }, { "epoch": 0.16921451215216626, "grad_norm": 1.2801981229163186, "learning_rate": 1.9000617292074564e-05, "loss": 0.4979, "step": 1201 }, { "epoch": 0.16935540683339204, "grad_norm": 1.3693867057863893, "learning_rate": 1.8998627702927366e-05, "loss": 0.5672, "step": 1202 }, { "epoch": 0.16949630151461784, "grad_norm": 1.3160602399527443, "learning_rate": 1.8996636239673528e-05, "loss": 0.5025, "step": 1203 }, { "epoch": 0.1696371961958436, "grad_norm": 1.122603314387253, "learning_rate": 1.8994642902727807e-05, "loss": 0.9439, "step": 1204 }, { "epoch": 0.16977809087706938, "grad_norm": 1.0467072074862025, "learning_rate": 1.8992647692505346e-05, "loss": 0.9307, "step": 1205 }, { "epoch": 0.16991898555829518, "grad_norm": 1.4392122090448205, "learning_rate": 1.899065060942168e-05, "loss": 0.5461, "step": 1206 }, { "epoch": 0.17005988023952096, "grad_norm": 1.3365960543790183, "learning_rate": 1.8988651653892728e-05, "loss": 0.473, "step": 1207 }, { "epoch": 0.17020077492074673, "grad_norm": 1.1170683670379893, "learning_rate": 1.8986650826334815e-05, "loss": 0.9343, "step": 1208 }, { "epoch": 0.17034166960197253, "grad_norm": 1.3462192987920107, "learning_rate": 1.898464812716464e-05, "loss": 0.4858, "step": 1209 }, { "epoch": 0.1704825642831983, "grad_norm": 1.067239770975429, "learning_rate": 1.8982643556799288e-05, "loss": 0.8741, "step": 1210 }, { "epoch": 0.1706234589644241, "grad_norm": 1.2876929373041042, "learning_rate": 1.8980637115656254e-05, "loss": 0.5432, "step": 1211 }, { "epoch": 0.17076435364564987, "grad_norm": 0.9843424085526304, "learning_rate": 1.8978628804153403e-05, "loss": 0.9058, "step": 1212 }, { "epoch": 0.17090524832687565, "grad_norm": 1.185961079512892, "learning_rate": 1.8976618622709007e-05, "loss": 0.9593, "step": 1213 }, { "epoch": 0.17104614300810145, "grad_norm": 1.0914010976207895, "learning_rate": 1.897460657174171e-05, "loss": 0.9975, "step": 1214 }, { "epoch": 0.17118703768932722, "grad_norm": 1.0365851795595338, "learning_rate": 1.8972592651670554e-05, "loss": 0.9384, "step": 1215 }, { "epoch": 0.17132793237055302, "grad_norm": 0.9235184728223094, "learning_rate": 1.8970576862914976e-05, "loss": 0.8821, "step": 1216 }, { "epoch": 0.1714688270517788, "grad_norm": 2.604492007638245, "learning_rate": 1.896855920589479e-05, "loss": 0.5089, "step": 1217 }, { "epoch": 0.17160972173300457, "grad_norm": 1.3243531320761923, "learning_rate": 1.896653968103021e-05, "loss": 0.5337, "step": 1218 }, { "epoch": 0.17175061641423037, "grad_norm": 1.2003192691553, "learning_rate": 1.8964518288741826e-05, "loss": 0.4895, "step": 1219 }, { "epoch": 0.17189151109545614, "grad_norm": 1.0360173407990674, "learning_rate": 1.8962495029450634e-05, "loss": 0.8864, "step": 1220 }, { "epoch": 0.17203240577668194, "grad_norm": 1.1565128489220802, "learning_rate": 1.8960469903578e-05, "loss": 0.9565, "step": 1221 }, { "epoch": 0.1721733004579077, "grad_norm": 1.279900356195116, "learning_rate": 1.89584429115457e-05, "loss": 0.5531, "step": 1222 }, { "epoch": 0.17231419513913349, "grad_norm": 1.1163701741471674, "learning_rate": 1.895641405377588e-05, "loss": 0.9462, "step": 1223 }, { "epoch": 0.1724550898203593, "grad_norm": 1.4658140794208194, "learning_rate": 1.895438333069108e-05, "loss": 0.546, "step": 1224 }, { "epoch": 0.17259598450158506, "grad_norm": 1.2022523195142074, "learning_rate": 1.8952350742714235e-05, "loss": 0.9754, "step": 1225 }, { "epoch": 0.17273687918281086, "grad_norm": 1.37624861257808, "learning_rate": 1.895031629026866e-05, "loss": 0.4148, "step": 1226 }, { "epoch": 0.17287777386403663, "grad_norm": 1.078007886051029, "learning_rate": 1.894827997377807e-05, "loss": 0.9789, "step": 1227 }, { "epoch": 0.1730186685452624, "grad_norm": 1.2549350647695539, "learning_rate": 1.8946241793666548e-05, "loss": 0.5775, "step": 1228 }, { "epoch": 0.1731595632264882, "grad_norm": 0.9340032601539342, "learning_rate": 1.8944201750358587e-05, "loss": 0.8855, "step": 1229 }, { "epoch": 0.17330045790771398, "grad_norm": 1.055164829967788, "learning_rate": 1.894215984427905e-05, "loss": 0.9884, "step": 1230 }, { "epoch": 0.17344135258893978, "grad_norm": 1.0353614335833996, "learning_rate": 1.8940116075853202e-05, "loss": 0.986, "step": 1231 }, { "epoch": 0.17358224727016555, "grad_norm": 1.0906583597429584, "learning_rate": 1.8938070445506695e-05, "loss": 0.8895, "step": 1232 }, { "epoch": 0.17372314195139132, "grad_norm": 0.9783853977555572, "learning_rate": 1.893602295366555e-05, "loss": 0.9523, "step": 1233 }, { "epoch": 0.17386403663261712, "grad_norm": 1.129504578919577, "learning_rate": 1.8933973600756198e-05, "loss": 0.9266, "step": 1234 }, { "epoch": 0.1740049313138429, "grad_norm": 1.0047549465572139, "learning_rate": 1.8931922387205453e-05, "loss": 0.9634, "step": 1235 }, { "epoch": 0.1741458259950687, "grad_norm": 0.9272521048849484, "learning_rate": 1.8929869313440502e-05, "loss": 0.9306, "step": 1236 }, { "epoch": 0.17428672067629447, "grad_norm": 1.2709055105347473, "learning_rate": 1.892781437988894e-05, "loss": 0.4682, "step": 1237 }, { "epoch": 0.17442761535752024, "grad_norm": 1.2274970031366017, "learning_rate": 1.892575758697874e-05, "loss": 0.8263, "step": 1238 }, { "epoch": 0.17456851003874604, "grad_norm": 1.142788559156352, "learning_rate": 1.892369893513825e-05, "loss": 0.9335, "step": 1239 }, { "epoch": 0.17470940471997182, "grad_norm": 0.992010860621681, "learning_rate": 1.8921638424796228e-05, "loss": 0.9267, "step": 1240 }, { "epoch": 0.17485029940119762, "grad_norm": 0.9943334195438611, "learning_rate": 1.8919576056381805e-05, "loss": 0.935, "step": 1241 }, { "epoch": 0.1749911940824234, "grad_norm": 0.9854340420708857, "learning_rate": 1.8917511830324494e-05, "loss": 0.8861, "step": 1242 }, { "epoch": 0.17513208876364916, "grad_norm": 1.0876167508425827, "learning_rate": 1.8915445747054218e-05, "loss": 0.9554, "step": 1243 }, { "epoch": 0.17527298344487496, "grad_norm": 1.0024872943252436, "learning_rate": 1.891337780700126e-05, "loss": 0.9483, "step": 1244 }, { "epoch": 0.17541387812610074, "grad_norm": 1.2763203333667086, "learning_rate": 1.8911308010596302e-05, "loss": 0.5436, "step": 1245 }, { "epoch": 0.17555477280732654, "grad_norm": 1.0425432754745545, "learning_rate": 1.8909236358270417e-05, "loss": 0.9837, "step": 1246 }, { "epoch": 0.1756956674885523, "grad_norm": 1.0659713605690504, "learning_rate": 1.890716285045506e-05, "loss": 0.9723, "step": 1247 }, { "epoch": 0.17583656216977808, "grad_norm": 1.3814263395635582, "learning_rate": 1.8905087487582062e-05, "loss": 0.5243, "step": 1248 }, { "epoch": 0.17597745685100388, "grad_norm": 1.0055924388424098, "learning_rate": 1.8903010270083664e-05, "loss": 0.9621, "step": 1249 }, { "epoch": 0.17611835153222966, "grad_norm": 1.074961189376706, "learning_rate": 1.8900931198392466e-05, "loss": 0.9557, "step": 1250 }, { "epoch": 0.17625924621345543, "grad_norm": 0.9885769209775357, "learning_rate": 1.8898850272941476e-05, "loss": 0.8592, "step": 1251 }, { "epoch": 0.17640014089468123, "grad_norm": 0.9675183480874228, "learning_rate": 1.8896767494164078e-05, "loss": 0.9018, "step": 1252 }, { "epoch": 0.176541035575907, "grad_norm": 1.491353961274918, "learning_rate": 1.8894682862494044e-05, "loss": 0.5881, "step": 1253 }, { "epoch": 0.1766819302571328, "grad_norm": 1.031194134123666, "learning_rate": 1.8892596378365526e-05, "loss": 0.9836, "step": 1254 }, { "epoch": 0.17682282493835857, "grad_norm": 1.1088011868332337, "learning_rate": 1.8890508042213076e-05, "loss": 0.9262, "step": 1255 }, { "epoch": 0.17696371961958435, "grad_norm": 1.076152416720591, "learning_rate": 1.8888417854471618e-05, "loss": 0.9321, "step": 1256 }, { "epoch": 0.17710461430081015, "grad_norm": 1.0501577626319163, "learning_rate": 1.888632581557647e-05, "loss": 0.9504, "step": 1257 }, { "epoch": 0.17724550898203592, "grad_norm": 1.1504039396234034, "learning_rate": 1.8884231925963326e-05, "loss": 0.9821, "step": 1258 }, { "epoch": 0.17738640366326172, "grad_norm": 1.2033762114169946, "learning_rate": 1.8882136186068275e-05, "loss": 0.9504, "step": 1259 }, { "epoch": 0.1775272983444875, "grad_norm": 1.4423562421557583, "learning_rate": 1.8880038596327793e-05, "loss": 0.5043, "step": 1260 }, { "epoch": 0.17766819302571327, "grad_norm": 1.058813586270665, "learning_rate": 1.8877939157178728e-05, "loss": 0.9219, "step": 1261 }, { "epoch": 0.17780908770693907, "grad_norm": 1.0168872052395441, "learning_rate": 1.8875837869058326e-05, "loss": 0.9423, "step": 1262 }, { "epoch": 0.17794998238816484, "grad_norm": 1.3168856311988149, "learning_rate": 1.8873734732404214e-05, "loss": 0.5583, "step": 1263 }, { "epoch": 0.17809087706939064, "grad_norm": 1.085293227915171, "learning_rate": 1.88716297476544e-05, "loss": 0.9682, "step": 1264 }, { "epoch": 0.1782317717506164, "grad_norm": 1.1775102688787116, "learning_rate": 1.8869522915247285e-05, "loss": 0.9741, "step": 1265 }, { "epoch": 0.1783726664318422, "grad_norm": 1.2410530750453366, "learning_rate": 1.8867414235621645e-05, "loss": 0.9069, "step": 1266 }, { "epoch": 0.178513561113068, "grad_norm": 1.0250731948959633, "learning_rate": 1.886530370921665e-05, "loss": 0.9518, "step": 1267 }, { "epoch": 0.17865445579429376, "grad_norm": 1.3887647462593635, "learning_rate": 1.8863191336471852e-05, "loss": 0.6563, "step": 1268 }, { "epoch": 0.17879535047551956, "grad_norm": 1.130218330459757, "learning_rate": 1.886107711782718e-05, "loss": 0.9352, "step": 1269 }, { "epoch": 0.17893624515674533, "grad_norm": 0.9848421343732172, "learning_rate": 1.8858961053722956e-05, "loss": 0.961, "step": 1270 }, { "epoch": 0.1790771398379711, "grad_norm": 0.9693430361456913, "learning_rate": 1.8856843144599888e-05, "loss": 1.0205, "step": 1271 }, { "epoch": 0.1792180345191969, "grad_norm": 1.0966394449267194, "learning_rate": 1.8854723390899058e-05, "loss": 0.8992, "step": 1272 }, { "epoch": 0.17935892920042268, "grad_norm": 1.3553046679941323, "learning_rate": 1.8852601793061945e-05, "loss": 0.5557, "step": 1273 }, { "epoch": 0.17949982388164848, "grad_norm": 1.175632855415253, "learning_rate": 1.8850478351530398e-05, "loss": 0.467, "step": 1274 }, { "epoch": 0.17964071856287425, "grad_norm": 1.5204841387376462, "learning_rate": 1.884835306674666e-05, "loss": 0.7501, "step": 1275 }, { "epoch": 0.17978161324410002, "grad_norm": 1.2000376877828403, "learning_rate": 1.8846225939153355e-05, "loss": 0.9849, "step": 1276 }, { "epoch": 0.17992250792532583, "grad_norm": 1.3539964725932443, "learning_rate": 1.8844096969193496e-05, "loss": 0.5571, "step": 1277 }, { "epoch": 0.1800634026065516, "grad_norm": 1.1157656554260267, "learning_rate": 1.8841966157310468e-05, "loss": 0.9502, "step": 1278 }, { "epoch": 0.1802042972877774, "grad_norm": 0.9772977154999835, "learning_rate": 1.8839833503948045e-05, "loss": 0.914, "step": 1279 }, { "epoch": 0.18034519196900317, "grad_norm": 1.008188972760195, "learning_rate": 1.8837699009550396e-05, "loss": 0.9251, "step": 1280 }, { "epoch": 0.18048608665022894, "grad_norm": 1.148299337803779, "learning_rate": 1.8835562674562053e-05, "loss": 0.9632, "step": 1281 }, { "epoch": 0.18062698133145474, "grad_norm": 1.0556580992324294, "learning_rate": 1.8833424499427946e-05, "loss": 0.9288, "step": 1282 }, { "epoch": 0.18076787601268052, "grad_norm": 1.061878204468105, "learning_rate": 1.8831284484593383e-05, "loss": 0.9405, "step": 1283 }, { "epoch": 0.18090877069390632, "grad_norm": 1.6106531672412312, "learning_rate": 1.882914263050406e-05, "loss": 0.736, "step": 1284 }, { "epoch": 0.1810496653751321, "grad_norm": 1.2412533039162708, "learning_rate": 1.8826998937606044e-05, "loss": 0.4559, "step": 1285 }, { "epoch": 0.18119056005635786, "grad_norm": 1.0834858173919, "learning_rate": 1.88248534063458e-05, "loss": 0.9935, "step": 1286 }, { "epoch": 0.18133145473758366, "grad_norm": 1.318413455045376, "learning_rate": 1.8822706037170162e-05, "loss": 0.5456, "step": 1287 }, { "epoch": 0.18147234941880944, "grad_norm": 1.4241342856501669, "learning_rate": 1.882055683052636e-05, "loss": 0.5996, "step": 1288 }, { "epoch": 0.18161324410003524, "grad_norm": 0.9718383501351702, "learning_rate": 1.8818405786861996e-05, "loss": 0.8956, "step": 1289 }, { "epoch": 0.181754138781261, "grad_norm": 1.2528719196567015, "learning_rate": 1.8816252906625063e-05, "loss": 0.5435, "step": 1290 }, { "epoch": 0.18189503346248678, "grad_norm": 1.105382033002499, "learning_rate": 1.8814098190263933e-05, "loss": 0.9137, "step": 1291 }, { "epoch": 0.18203592814371258, "grad_norm": 1.2172012028180348, "learning_rate": 1.8811941638227353e-05, "loss": 0.5532, "step": 1292 }, { "epoch": 0.18217682282493836, "grad_norm": 1.1020144942631478, "learning_rate": 1.8809783250964462e-05, "loss": 0.9337, "step": 1293 }, { "epoch": 0.18231771750616413, "grad_norm": 1.307940442802597, "learning_rate": 1.880762302892478e-05, "loss": 0.5425, "step": 1294 }, { "epoch": 0.18245861218738993, "grad_norm": 1.2077766075715228, "learning_rate": 1.880546097255821e-05, "loss": 0.5289, "step": 1295 }, { "epoch": 0.1825995068686157, "grad_norm": 0.9423834308028162, "learning_rate": 1.8803297082315026e-05, "loss": 0.8869, "step": 1296 }, { "epoch": 0.1827404015498415, "grad_norm": 1.2633680419863225, "learning_rate": 1.88011313586459e-05, "loss": 0.9437, "step": 1297 }, { "epoch": 0.18288129623106728, "grad_norm": 1.2020317876903195, "learning_rate": 1.879896380200188e-05, "loss": 0.9897, "step": 1298 }, { "epoch": 0.18302219091229305, "grad_norm": 1.07742999624939, "learning_rate": 1.8796794412834385e-05, "loss": 0.9699, "step": 1299 }, { "epoch": 0.18316308559351885, "grad_norm": 1.012252566660764, "learning_rate": 1.879462319159523e-05, "loss": 0.9368, "step": 1300 }, { "epoch": 0.18330398027474462, "grad_norm": 1.1956294336749453, "learning_rate": 1.8792450138736607e-05, "loss": 0.9086, "step": 1301 }, { "epoch": 0.18344487495597042, "grad_norm": 1.2272910009035858, "learning_rate": 1.8790275254711088e-05, "loss": 0.9417, "step": 1302 }, { "epoch": 0.1835857696371962, "grad_norm": 0.9835880711217463, "learning_rate": 1.8788098539971623e-05, "loss": 0.8927, "step": 1303 }, { "epoch": 0.18372666431842197, "grad_norm": 1.338654466885129, "learning_rate": 1.8785919994971556e-05, "loss": 0.5945, "step": 1304 }, { "epoch": 0.18386755899964777, "grad_norm": 1.1220129894106716, "learning_rate": 1.8783739620164592e-05, "loss": 0.9283, "step": 1305 }, { "epoch": 0.18400845368087354, "grad_norm": 0.9952058530907196, "learning_rate": 1.878155741600484e-05, "loss": 0.9509, "step": 1306 }, { "epoch": 0.18414934836209934, "grad_norm": 1.4089658359731017, "learning_rate": 1.877937338294677e-05, "loss": 0.6486, "step": 1307 }, { "epoch": 0.1842902430433251, "grad_norm": 0.9683234310270502, "learning_rate": 1.8777187521445244e-05, "loss": 0.9454, "step": 1308 }, { "epoch": 0.1844311377245509, "grad_norm": 1.307455560145662, "learning_rate": 1.8774999831955506e-05, "loss": 0.5325, "step": 1309 }, { "epoch": 0.1845720324057767, "grad_norm": 1.3925670480579344, "learning_rate": 1.877281031493317e-05, "loss": 0.5615, "step": 1310 }, { "epoch": 0.18471292708700246, "grad_norm": 0.9785054928711507, "learning_rate": 1.8770618970834245e-05, "loss": 0.9516, "step": 1311 }, { "epoch": 0.18485382176822826, "grad_norm": 1.3355895587969489, "learning_rate": 1.8768425800115108e-05, "loss": 0.5694, "step": 1312 }, { "epoch": 0.18499471644945403, "grad_norm": 1.2047035365406313, "learning_rate": 1.876623080323252e-05, "loss": 0.5664, "step": 1313 }, { "epoch": 0.1851356111306798, "grad_norm": 1.0668476011599413, "learning_rate": 1.8764033980643632e-05, "loss": 0.8714, "step": 1314 }, { "epoch": 0.1852765058119056, "grad_norm": 1.515959433319865, "learning_rate": 1.876183533280596e-05, "loss": 0.5658, "step": 1315 }, { "epoch": 0.18541740049313138, "grad_norm": 1.3171116221703836, "learning_rate": 1.875963486017741e-05, "loss": 0.5974, "step": 1316 }, { "epoch": 0.18555829517435718, "grad_norm": 1.1732696155399698, "learning_rate": 1.8757432563216264e-05, "loss": 0.9482, "step": 1317 }, { "epoch": 0.18569918985558295, "grad_norm": 1.0138129443380859, "learning_rate": 1.8755228442381184e-05, "loss": 0.9519, "step": 1318 }, { "epoch": 0.18584008453680873, "grad_norm": 1.338095245125289, "learning_rate": 1.8753022498131218e-05, "loss": 0.5518, "step": 1319 }, { "epoch": 0.18598097921803453, "grad_norm": 1.0326654007526528, "learning_rate": 1.8750814730925782e-05, "loss": 0.9439, "step": 1320 }, { "epoch": 0.1861218738992603, "grad_norm": 1.2791153762572927, "learning_rate": 1.8748605141224684e-05, "loss": 0.9709, "step": 1321 }, { "epoch": 0.1862627685804861, "grad_norm": 1.005465443038928, "learning_rate": 1.8746393729488103e-05, "loss": 0.9803, "step": 1322 }, { "epoch": 0.18640366326171187, "grad_norm": 0.9781542330135374, "learning_rate": 1.8744180496176604e-05, "loss": 0.9254, "step": 1323 }, { "epoch": 0.18654455794293764, "grad_norm": 0.9439984083252958, "learning_rate": 1.8741965441751123e-05, "loss": 0.9116, "step": 1324 }, { "epoch": 0.18668545262416344, "grad_norm": 1.2289935028199914, "learning_rate": 1.873974856667298e-05, "loss": 0.5088, "step": 1325 }, { "epoch": 0.18682634730538922, "grad_norm": 1.0229441369728638, "learning_rate": 1.8737529871403878e-05, "loss": 0.9679, "step": 1326 }, { "epoch": 0.18696724198661502, "grad_norm": 1.320916070897225, "learning_rate": 1.87353093564059e-05, "loss": 0.5177, "step": 1327 }, { "epoch": 0.1871081366678408, "grad_norm": 1.3064241756133281, "learning_rate": 1.873308702214149e-05, "loss": 0.6282, "step": 1328 }, { "epoch": 0.18724903134906656, "grad_norm": 1.1347695317235975, "learning_rate": 1.8730862869073493e-05, "loss": 0.9888, "step": 1329 }, { "epoch": 0.18738992603029236, "grad_norm": 1.0209927105574594, "learning_rate": 1.8728636897665125e-05, "loss": 0.9681, "step": 1330 }, { "epoch": 0.18753082071151814, "grad_norm": 1.0833596487764476, "learning_rate": 1.872640910837998e-05, "loss": 0.9314, "step": 1331 }, { "epoch": 0.18767171539274394, "grad_norm": 1.0290630266091605, "learning_rate": 1.8724179501682024e-05, "loss": 0.8958, "step": 1332 }, { "epoch": 0.1878126100739697, "grad_norm": 1.3850073327887613, "learning_rate": 1.8721948078035613e-05, "loss": 0.6633, "step": 1333 }, { "epoch": 0.18795350475519548, "grad_norm": 1.1024191506938756, "learning_rate": 1.8719714837905477e-05, "loss": 0.9544, "step": 1334 }, { "epoch": 0.18809439943642128, "grad_norm": 1.0107029834508874, "learning_rate": 1.8717479781756723e-05, "loss": 0.8568, "step": 1335 }, { "epoch": 0.18823529411764706, "grad_norm": 1.4317765908285123, "learning_rate": 1.8715242910054833e-05, "loss": 0.671, "step": 1336 }, { "epoch": 0.18837618879887283, "grad_norm": 1.1721938239743743, "learning_rate": 1.8713004223265674e-05, "loss": 0.9472, "step": 1337 }, { "epoch": 0.18851708348009863, "grad_norm": 0.9708447553439694, "learning_rate": 1.8710763721855487e-05, "loss": 0.919, "step": 1338 }, { "epoch": 0.1886579781613244, "grad_norm": 1.1902922734227634, "learning_rate": 1.8708521406290893e-05, "loss": 0.9435, "step": 1339 }, { "epoch": 0.1887988728425502, "grad_norm": 1.4206769804277124, "learning_rate": 1.8706277277038888e-05, "loss": 0.5646, "step": 1340 }, { "epoch": 0.18893976752377598, "grad_norm": 1.1307040217002289, "learning_rate": 1.870403133456685e-05, "loss": 0.9495, "step": 1341 }, { "epoch": 0.18908066220500175, "grad_norm": 1.0597048654314198, "learning_rate": 1.870178357934253e-05, "loss": 0.9846, "step": 1342 }, { "epoch": 0.18922155688622755, "grad_norm": 1.37457675415607, "learning_rate": 1.869953401183406e-05, "loss": 0.5947, "step": 1343 }, { "epoch": 0.18936245156745332, "grad_norm": 1.0267848568103821, "learning_rate": 1.8697282632509946e-05, "loss": 0.9847, "step": 1344 }, { "epoch": 0.18950334624867912, "grad_norm": 1.2466727998023905, "learning_rate": 1.8695029441839076e-05, "loss": 0.5234, "step": 1345 }, { "epoch": 0.1896442409299049, "grad_norm": 1.2278889771462642, "learning_rate": 1.869277444029071e-05, "loss": 0.9711, "step": 1346 }, { "epoch": 0.18978513561113067, "grad_norm": 1.3145097373870058, "learning_rate": 1.869051762833449e-05, "loss": 0.5029, "step": 1347 }, { "epoch": 0.18992603029235647, "grad_norm": 1.3879849924268983, "learning_rate": 1.8688259006440432e-05, "loss": 0.6738, "step": 1348 }, { "epoch": 0.19006692497358224, "grad_norm": 1.4325648646296543, "learning_rate": 1.868599857507893e-05, "loss": 0.5447, "step": 1349 }, { "epoch": 0.19020781965480804, "grad_norm": 1.1641173868815191, "learning_rate": 1.8683736334720753e-05, "loss": 1.0322, "step": 1350 }, { "epoch": 0.19034871433603381, "grad_norm": 1.3427777691094858, "learning_rate": 1.868147228583705e-05, "loss": 0.5441, "step": 1351 }, { "epoch": 0.1904896090172596, "grad_norm": 1.1417287446559719, "learning_rate": 1.8679206428899346e-05, "loss": 1.0085, "step": 1352 }, { "epoch": 0.1906305036984854, "grad_norm": 1.3157118650015083, "learning_rate": 1.867693876437954e-05, "loss": 0.4331, "step": 1353 }, { "epoch": 0.19077139837971116, "grad_norm": 1.0581559279077732, "learning_rate": 1.867466929274991e-05, "loss": 0.9349, "step": 1354 }, { "epoch": 0.19091229306093696, "grad_norm": 1.414141637123825, "learning_rate": 1.8672398014483103e-05, "loss": 0.5966, "step": 1355 }, { "epoch": 0.19105318774216273, "grad_norm": 1.0899820304334296, "learning_rate": 1.867012493005216e-05, "loss": 0.9742, "step": 1356 }, { "epoch": 0.1911940824233885, "grad_norm": 0.9899242144368154, "learning_rate": 1.8667850039930476e-05, "loss": 0.9816, "step": 1357 }, { "epoch": 0.1913349771046143, "grad_norm": 1.0139668484107431, "learning_rate": 1.8665573344591843e-05, "loss": 0.9206, "step": 1358 }, { "epoch": 0.19147587178584008, "grad_norm": 1.142274172315596, "learning_rate": 1.866329484451041e-05, "loss": 0.9705, "step": 1359 }, { "epoch": 0.19161676646706588, "grad_norm": 1.051494083681986, "learning_rate": 1.8661014540160715e-05, "loss": 0.9554, "step": 1360 }, { "epoch": 0.19175766114829165, "grad_norm": 1.0401552577787758, "learning_rate": 1.865873243201767e-05, "loss": 0.8634, "step": 1361 }, { "epoch": 0.19189855582951743, "grad_norm": 1.0882087404730751, "learning_rate": 1.8656448520556557e-05, "loss": 0.9441, "step": 1362 }, { "epoch": 0.19203945051074323, "grad_norm": 1.0196583770832655, "learning_rate": 1.8654162806253035e-05, "loss": 0.895, "step": 1363 }, { "epoch": 0.192180345191969, "grad_norm": 1.131454822701076, "learning_rate": 1.8651875289583138e-05, "loss": 0.9579, "step": 1364 }, { "epoch": 0.1923212398731948, "grad_norm": 0.9440645344900901, "learning_rate": 1.8649585971023284e-05, "loss": 0.8723, "step": 1365 }, { "epoch": 0.19246213455442057, "grad_norm": 1.056413679586522, "learning_rate": 1.864729485105026e-05, "loss": 0.9252, "step": 1366 }, { "epoch": 0.19260302923564634, "grad_norm": 1.0061208087725262, "learning_rate": 1.8645001930141223e-05, "loss": 0.9511, "step": 1367 }, { "epoch": 0.19274392391687215, "grad_norm": 0.9483891685861567, "learning_rate": 1.8642707208773712e-05, "loss": 0.9093, "step": 1368 }, { "epoch": 0.19288481859809792, "grad_norm": 0.9884883756517255, "learning_rate": 1.864041068742564e-05, "loss": 0.9185, "step": 1369 }, { "epoch": 0.19302571327932372, "grad_norm": 1.4546857906781203, "learning_rate": 1.8638112366575293e-05, "loss": 0.5839, "step": 1370 }, { "epoch": 0.1931666079605495, "grad_norm": 1.1463724717203803, "learning_rate": 1.8635812246701336e-05, "loss": 0.9145, "step": 1371 }, { "epoch": 0.19330750264177526, "grad_norm": 1.5116182736896346, "learning_rate": 1.86335103282828e-05, "loss": 0.5392, "step": 1372 }, { "epoch": 0.19344839732300106, "grad_norm": 1.072605382722027, "learning_rate": 1.86312066117991e-05, "loss": 0.9613, "step": 1373 }, { "epoch": 0.19358929200422684, "grad_norm": 1.0997871850106302, "learning_rate": 1.8628901097730018e-05, "loss": 0.9478, "step": 1374 }, { "epoch": 0.19373018668545264, "grad_norm": 1.1734326890665436, "learning_rate": 1.8626593786555713e-05, "loss": 0.8868, "step": 1375 }, { "epoch": 0.1938710813666784, "grad_norm": 0.9838554170054858, "learning_rate": 1.8624284678756727e-05, "loss": 0.9339, "step": 1376 }, { "epoch": 0.19401197604790418, "grad_norm": 1.4657379450123946, "learning_rate": 1.8621973774813958e-05, "loss": 0.5942, "step": 1377 }, { "epoch": 0.19415287072912998, "grad_norm": 1.1940034708764145, "learning_rate": 1.8619661075208696e-05, "loss": 0.9865, "step": 1378 }, { "epoch": 0.19429376541035576, "grad_norm": 1.4252972232745975, "learning_rate": 1.8617346580422593e-05, "loss": 0.6255, "step": 1379 }, { "epoch": 0.19443466009158153, "grad_norm": 1.1489705571814612, "learning_rate": 1.8615030290937683e-05, "loss": 0.941, "step": 1380 }, { "epoch": 0.19457555477280733, "grad_norm": 1.4459069972266319, "learning_rate": 1.8612712207236367e-05, "loss": 0.526, "step": 1381 }, { "epoch": 0.1947164494540331, "grad_norm": 1.2580203999362616, "learning_rate": 1.8610392329801418e-05, "loss": 1.0327, "step": 1382 }, { "epoch": 0.1948573441352589, "grad_norm": 1.15208155458999, "learning_rate": 1.8608070659115998e-05, "loss": 0.9658, "step": 1383 }, { "epoch": 0.19499823881648468, "grad_norm": 1.2962720891646748, "learning_rate": 1.8605747195663624e-05, "loss": 0.6122, "step": 1384 }, { "epoch": 0.19513913349771045, "grad_norm": 1.349178224016737, "learning_rate": 1.86034219399282e-05, "loss": 0.5505, "step": 1385 }, { "epoch": 0.19528002817893625, "grad_norm": 1.3010401565792207, "learning_rate": 1.8601094892393988e-05, "loss": 0.5576, "step": 1386 }, { "epoch": 0.19542092286016202, "grad_norm": 1.2177444110701319, "learning_rate": 1.8598766053545638e-05, "loss": 0.5406, "step": 1387 }, { "epoch": 0.19556181754138782, "grad_norm": 1.3256735638116988, "learning_rate": 1.859643542386817e-05, "loss": 0.5087, "step": 1388 }, { "epoch": 0.1957027122226136, "grad_norm": 1.191788859348314, "learning_rate": 1.8594103003846973e-05, "loss": 0.9127, "step": 1389 }, { "epoch": 0.19584360690383937, "grad_norm": 1.3082864024259304, "learning_rate": 1.8591768793967807e-05, "loss": 0.5464, "step": 1390 }, { "epoch": 0.19598450158506517, "grad_norm": 1.1912751680183733, "learning_rate": 1.858943279471681e-05, "loss": 0.9297, "step": 1391 }, { "epoch": 0.19612539626629094, "grad_norm": 1.2484602025441434, "learning_rate": 1.8587095006580494e-05, "loss": 0.9775, "step": 1392 }, { "epoch": 0.19626629094751674, "grad_norm": 1.1220623173244313, "learning_rate": 1.8584755430045735e-05, "loss": 1.0005, "step": 1393 }, { "epoch": 0.19640718562874251, "grad_norm": 1.1058102373367513, "learning_rate": 1.8582414065599793e-05, "loss": 0.9252, "step": 1394 }, { "epoch": 0.1965480803099683, "grad_norm": 1.4161833719299328, "learning_rate": 1.8580070913730286e-05, "loss": 0.4927, "step": 1395 }, { "epoch": 0.1966889749911941, "grad_norm": 1.029222326062418, "learning_rate": 1.857772597492522e-05, "loss": 0.8855, "step": 1396 }, { "epoch": 0.19682986967241986, "grad_norm": 1.0835860572049407, "learning_rate": 1.857537924967296e-05, "loss": 0.9452, "step": 1397 }, { "epoch": 0.19697076435364566, "grad_norm": 1.3862469610076327, "learning_rate": 1.8573030738462255e-05, "loss": 0.5141, "step": 1398 }, { "epoch": 0.19711165903487143, "grad_norm": 1.2566772570925755, "learning_rate": 1.8570680441782218e-05, "loss": 0.6031, "step": 1399 }, { "epoch": 0.1972525537160972, "grad_norm": 1.1792115736662505, "learning_rate": 1.856832836012233e-05, "loss": 0.9398, "step": 1400 }, { "epoch": 0.197393448397323, "grad_norm": 1.025259107809762, "learning_rate": 1.856597449397245e-05, "loss": 0.898, "step": 1401 }, { "epoch": 0.19753434307854878, "grad_norm": 1.3269901153328896, "learning_rate": 1.8563618843822813e-05, "loss": 0.4948, "step": 1402 }, { "epoch": 0.19767523775977458, "grad_norm": 1.0498066059997646, "learning_rate": 1.856126141016402e-05, "loss": 0.8964, "step": 1403 }, { "epoch": 0.19781613244100035, "grad_norm": 1.0712267633978976, "learning_rate": 1.8558902193487043e-05, "loss": 0.9433, "step": 1404 }, { "epoch": 0.19795702712222613, "grad_norm": 1.417715977697612, "learning_rate": 1.8556541194283222e-05, "loss": 0.6145, "step": 1405 }, { "epoch": 0.19809792180345193, "grad_norm": 1.1119555070405238, "learning_rate": 1.8554178413044276e-05, "loss": 0.8816, "step": 1406 }, { "epoch": 0.1982388164846777, "grad_norm": 1.0979983226521588, "learning_rate": 1.8551813850262294e-05, "loss": 0.991, "step": 1407 }, { "epoch": 0.1983797111659035, "grad_norm": 1.0278104313228904, "learning_rate": 1.8549447506429728e-05, "loss": 0.9663, "step": 1408 }, { "epoch": 0.19852060584712927, "grad_norm": 1.130231950588619, "learning_rate": 1.8547079382039408e-05, "loss": 0.8799, "step": 1409 }, { "epoch": 0.19866150052835505, "grad_norm": 1.2637889934402995, "learning_rate": 1.854470947758454e-05, "loss": 0.9819, "step": 1410 }, { "epoch": 0.19880239520958085, "grad_norm": 1.027398340360764, "learning_rate": 1.8542337793558685e-05, "loss": 0.9179, "step": 1411 }, { "epoch": 0.19894328989080662, "grad_norm": 1.2684681764538357, "learning_rate": 1.853996433045579e-05, "loss": 0.575, "step": 1412 }, { "epoch": 0.19908418457203242, "grad_norm": 1.0954355306880266, "learning_rate": 1.853758908877016e-05, "loss": 0.9861, "step": 1413 }, { "epoch": 0.1992250792532582, "grad_norm": 0.9542982520161705, "learning_rate": 1.8535212068996483e-05, "loss": 0.9058, "step": 1414 }, { "epoch": 0.19936597393448396, "grad_norm": 1.3688636685443685, "learning_rate": 1.8532833271629812e-05, "loss": 0.9865, "step": 1415 }, { "epoch": 0.19950686861570976, "grad_norm": 1.121703683030739, "learning_rate": 1.8530452697165558e-05, "loss": 0.9151, "step": 1416 }, { "epoch": 0.19964776329693554, "grad_norm": 1.4457082257678122, "learning_rate": 1.8528070346099527e-05, "loss": 0.5489, "step": 1417 }, { "epoch": 0.19978865797816134, "grad_norm": 1.2168036939048548, "learning_rate": 1.8525686218927876e-05, "loss": 0.9293, "step": 1418 }, { "epoch": 0.1999295526593871, "grad_norm": 1.2283699703656508, "learning_rate": 1.8523300316147132e-05, "loss": 0.9524, "step": 1419 }, { "epoch": 0.20007044734061288, "grad_norm": 1.0743353700504985, "learning_rate": 1.8520912638254203e-05, "loss": 0.9573, "step": 1420 }, { "epoch": 0.20021134202183868, "grad_norm": 1.3687068147145423, "learning_rate": 1.8518523185746362e-05, "loss": 0.483, "step": 1421 }, { "epoch": 0.20035223670306446, "grad_norm": 1.4469329375801028, "learning_rate": 1.8516131959121246e-05, "loss": 0.6389, "step": 1422 }, { "epoch": 0.20049313138429023, "grad_norm": 1.3252726769085086, "learning_rate": 1.8513738958876864e-05, "loss": 0.6351, "step": 1423 }, { "epoch": 0.20063402606551603, "grad_norm": 1.2465252240119873, "learning_rate": 1.8511344185511607e-05, "loss": 0.9776, "step": 1424 }, { "epoch": 0.2007749207467418, "grad_norm": 1.2908240077734552, "learning_rate": 1.8508947639524214e-05, "loss": 0.5324, "step": 1425 }, { "epoch": 0.2009158154279676, "grad_norm": 1.0532203389519414, "learning_rate": 1.85065493214138e-05, "loss": 0.9354, "step": 1426 }, { "epoch": 0.20105671010919338, "grad_norm": 1.32649449434131, "learning_rate": 1.8504149231679868e-05, "loss": 0.9414, "step": 1427 }, { "epoch": 0.20119760479041915, "grad_norm": 1.1443235220808536, "learning_rate": 1.8501747370822264e-05, "loss": 0.4165, "step": 1428 }, { "epoch": 0.20133849947164495, "grad_norm": 1.022093188227289, "learning_rate": 1.8499343739341214e-05, "loss": 0.8634, "step": 1429 }, { "epoch": 0.20147939415287072, "grad_norm": 1.0048906583560795, "learning_rate": 1.8496938337737315e-05, "loss": 0.9364, "step": 1430 }, { "epoch": 0.20162028883409652, "grad_norm": 1.2750383814357351, "learning_rate": 1.8494531166511528e-05, "loss": 0.6153, "step": 1431 }, { "epoch": 0.2017611835153223, "grad_norm": 1.4094601807364866, "learning_rate": 1.8492122226165185e-05, "loss": 0.5552, "step": 1432 }, { "epoch": 0.20190207819654807, "grad_norm": 1.0752321419099609, "learning_rate": 1.8489711517199988e-05, "loss": 0.9843, "step": 1433 }, { "epoch": 0.20204297287777387, "grad_norm": 1.257467901808004, "learning_rate": 1.8487299040118003e-05, "loss": 0.6221, "step": 1434 }, { "epoch": 0.20218386755899964, "grad_norm": 1.137860195225475, "learning_rate": 1.8484884795421667e-05, "loss": 0.9739, "step": 1435 }, { "epoch": 0.20232476224022544, "grad_norm": 1.1304397711441472, "learning_rate": 1.8482468783613785e-05, "loss": 0.9694, "step": 1436 }, { "epoch": 0.20246565692145121, "grad_norm": 1.4013148270146847, "learning_rate": 1.848005100519753e-05, "loss": 0.6487, "step": 1437 }, { "epoch": 0.202606551602677, "grad_norm": 0.9571520358636473, "learning_rate": 1.8477631460676438e-05, "loss": 0.9406, "step": 1438 }, { "epoch": 0.2027474462839028, "grad_norm": 1.1426725927646089, "learning_rate": 1.8475210150554424e-05, "loss": 0.9632, "step": 1439 }, { "epoch": 0.20288834096512856, "grad_norm": 1.0837863012978979, "learning_rate": 1.8472787075335766e-05, "loss": 0.9499, "step": 1440 }, { "epoch": 0.20302923564635436, "grad_norm": 1.0171382326650646, "learning_rate": 1.8470362235525098e-05, "loss": 0.9799, "step": 1441 }, { "epoch": 0.20317013032758013, "grad_norm": 1.0785376100400408, "learning_rate": 1.846793563162744e-05, "loss": 0.9759, "step": 1442 }, { "epoch": 0.2033110250088059, "grad_norm": 1.0956946981465427, "learning_rate": 1.8465507264148167e-05, "loss": 0.9139, "step": 1443 }, { "epoch": 0.2034519196900317, "grad_norm": 1.4184313272132625, "learning_rate": 1.8463077133593023e-05, "loss": 0.5797, "step": 1444 }, { "epoch": 0.20359281437125748, "grad_norm": 1.303776209293484, "learning_rate": 1.8460645240468124e-05, "loss": 0.5607, "step": 1445 }, { "epoch": 0.20373370905248328, "grad_norm": 0.9504572724118083, "learning_rate": 1.8458211585279952e-05, "loss": 0.9145, "step": 1446 }, { "epoch": 0.20387460373370905, "grad_norm": 1.058822164727323, "learning_rate": 1.845577616853535e-05, "loss": 0.9268, "step": 1447 }, { "epoch": 0.20401549841493483, "grad_norm": 0.9646608520400957, "learning_rate": 1.8453338990741535e-05, "loss": 0.9043, "step": 1448 }, { "epoch": 0.20415639309616063, "grad_norm": 1.142144917258613, "learning_rate": 1.8450900052406084e-05, "loss": 0.932, "step": 1449 }, { "epoch": 0.2042972877773864, "grad_norm": 1.0350488142090242, "learning_rate": 1.844845935403695e-05, "loss": 0.954, "step": 1450 }, { "epoch": 0.2044381824586122, "grad_norm": 1.0905566446077428, "learning_rate": 1.8446016896142443e-05, "loss": 0.944, "step": 1451 }, { "epoch": 0.20457907713983797, "grad_norm": 1.1302965274229246, "learning_rate": 1.844357267923125e-05, "loss": 0.9121, "step": 1452 }, { "epoch": 0.20471997182106375, "grad_norm": 1.0476549578174608, "learning_rate": 1.8441126703812408e-05, "loss": 0.9688, "step": 1453 }, { "epoch": 0.20486086650228955, "grad_norm": 1.172487118490468, "learning_rate": 1.8438678970395334e-05, "loss": 0.9745, "step": 1454 }, { "epoch": 0.20500176118351532, "grad_norm": 1.1341442060479847, "learning_rate": 1.843622947948981e-05, "loss": 0.9151, "step": 1455 }, { "epoch": 0.20514265586474112, "grad_norm": 0.9780424488610578, "learning_rate": 1.843377823160598e-05, "loss": 0.9218, "step": 1456 }, { "epoch": 0.2052835505459669, "grad_norm": 1.0940647274807571, "learning_rate": 1.843132522725435e-05, "loss": 0.868, "step": 1457 }, { "epoch": 0.20542444522719266, "grad_norm": 1.0403791656526435, "learning_rate": 1.8428870466945808e-05, "loss": 0.9743, "step": 1458 }, { "epoch": 0.20556533990841847, "grad_norm": 1.254436310212664, "learning_rate": 1.842641395119159e-05, "loss": 0.5743, "step": 1459 }, { "epoch": 0.20570623458964424, "grad_norm": 1.0754780397986294, "learning_rate": 1.8423955680503304e-05, "loss": 0.9012, "step": 1460 }, { "epoch": 0.20584712927087004, "grad_norm": 1.3481063900708408, "learning_rate": 1.842149565539292e-05, "loss": 0.54, "step": 1461 }, { "epoch": 0.2059880239520958, "grad_norm": 1.3359326048833233, "learning_rate": 1.8419033876372784e-05, "loss": 0.546, "step": 1462 }, { "epoch": 0.20612891863332158, "grad_norm": 1.098308300365737, "learning_rate": 1.84165703439556e-05, "loss": 0.9017, "step": 1463 }, { "epoch": 0.20626981331454738, "grad_norm": 1.2325434145506866, "learning_rate": 1.8414105058654434e-05, "loss": 0.5435, "step": 1464 }, { "epoch": 0.20641070799577316, "grad_norm": 1.3591419288956683, "learning_rate": 1.841163802098272e-05, "loss": 0.5644, "step": 1465 }, { "epoch": 0.20655160267699893, "grad_norm": 1.1180231059225731, "learning_rate": 1.8409169231454262e-05, "loss": 0.9514, "step": 1466 }, { "epoch": 0.20669249735822473, "grad_norm": 1.1138394309198978, "learning_rate": 1.840669869058322e-05, "loss": 0.9198, "step": 1467 }, { "epoch": 0.2068333920394505, "grad_norm": 1.3697122688105867, "learning_rate": 1.840422639888413e-05, "loss": 0.5844, "step": 1468 }, { "epoch": 0.2069742867206763, "grad_norm": 0.9708574278027363, "learning_rate": 1.8401752356871877e-05, "loss": 0.9049, "step": 1469 }, { "epoch": 0.20711518140190208, "grad_norm": 1.0365596518196094, "learning_rate": 1.8399276565061726e-05, "loss": 0.914, "step": 1470 }, { "epoch": 0.20725607608312785, "grad_norm": 0.9508387498020159, "learning_rate": 1.8396799023969295e-05, "loss": 0.9176, "step": 1471 }, { "epoch": 0.20739697076435365, "grad_norm": 1.2098182048540562, "learning_rate": 1.8394319734110573e-05, "loss": 0.9838, "step": 1472 }, { "epoch": 0.20753786544557942, "grad_norm": 1.0509064027425423, "learning_rate": 1.8391838696001917e-05, "loss": 0.8552, "step": 1473 }, { "epoch": 0.20767876012680522, "grad_norm": 1.3789743591944532, "learning_rate": 1.8389355910160037e-05, "loss": 0.6095, "step": 1474 }, { "epoch": 0.207819654808031, "grad_norm": 0.9971867425193721, "learning_rate": 1.8386871377102015e-05, "loss": 0.9273, "step": 1475 }, { "epoch": 0.20796054948925677, "grad_norm": 1.2403913783105245, "learning_rate": 1.8384385097345284e-05, "loss": 0.9435, "step": 1476 }, { "epoch": 0.20810144417048257, "grad_norm": 1.400474613069089, "learning_rate": 1.8381897071407665e-05, "loss": 0.6026, "step": 1477 }, { "epoch": 0.20824233885170834, "grad_norm": 1.116010026966968, "learning_rate": 1.8379407299807327e-05, "loss": 0.9077, "step": 1478 }, { "epoch": 0.20838323353293414, "grad_norm": 1.116107856870795, "learning_rate": 1.8376915783062794e-05, "loss": 0.9601, "step": 1479 }, { "epoch": 0.20852412821415992, "grad_norm": 1.1470434850107973, "learning_rate": 1.8374422521692974e-05, "loss": 0.9682, "step": 1480 }, { "epoch": 0.2086650228953857, "grad_norm": 1.066695015105291, "learning_rate": 1.837192751621712e-05, "loss": 0.9675, "step": 1481 }, { "epoch": 0.2088059175766115, "grad_norm": 0.966220753070002, "learning_rate": 1.8369430767154867e-05, "loss": 0.9436, "step": 1482 }, { "epoch": 0.20894681225783726, "grad_norm": 1.0406413643262256, "learning_rate": 1.8366932275026193e-05, "loss": 0.9459, "step": 1483 }, { "epoch": 0.20908770693906306, "grad_norm": 1.4078162679528008, "learning_rate": 1.8364432040351454e-05, "loss": 0.5866, "step": 1484 }, { "epoch": 0.20922860162028883, "grad_norm": 1.0413276988950741, "learning_rate": 1.8361930063651358e-05, "loss": 0.9698, "step": 1485 }, { "epoch": 0.2093694963015146, "grad_norm": 0.9390193726383167, "learning_rate": 1.835942634544699e-05, "loss": 0.8477, "step": 1486 }, { "epoch": 0.2095103909827404, "grad_norm": 1.2188756128504916, "learning_rate": 1.835692088625978e-05, "loss": 0.9543, "step": 1487 }, { "epoch": 0.20965128566396618, "grad_norm": 1.0293419950114997, "learning_rate": 1.835441368661153e-05, "loss": 0.9116, "step": 1488 }, { "epoch": 0.20979218034519198, "grad_norm": 1.025726263499662, "learning_rate": 1.8351904747024415e-05, "loss": 0.8932, "step": 1489 }, { "epoch": 0.20993307502641775, "grad_norm": 1.11619048675688, "learning_rate": 1.8349394068020948e-05, "loss": 0.8757, "step": 1490 }, { "epoch": 0.21007396970764353, "grad_norm": 1.0377661729444547, "learning_rate": 1.8346881650124025e-05, "loss": 0.9022, "step": 1491 }, { "epoch": 0.21021486438886933, "grad_norm": 1.287467398400833, "learning_rate": 1.8344367493856895e-05, "loss": 0.5703, "step": 1492 }, { "epoch": 0.2103557590700951, "grad_norm": 0.9921820775536097, "learning_rate": 1.8341851599743166e-05, "loss": 0.9383, "step": 1493 }, { "epoch": 0.2104966537513209, "grad_norm": 0.9178622354365613, "learning_rate": 1.8339333968306824e-05, "loss": 0.9207, "step": 1494 }, { "epoch": 0.21063754843254667, "grad_norm": 1.0572816463050427, "learning_rate": 1.8336814600072196e-05, "loss": 0.905, "step": 1495 }, { "epoch": 0.21077844311377245, "grad_norm": 1.11241646772328, "learning_rate": 1.8334293495563984e-05, "loss": 0.502, "step": 1496 }, { "epoch": 0.21091933779499825, "grad_norm": 1.13098163111864, "learning_rate": 1.8331770655307247e-05, "loss": 0.9025, "step": 1497 }, { "epoch": 0.21106023247622402, "grad_norm": 0.9817735940466723, "learning_rate": 1.8329246079827407e-05, "loss": 0.932, "step": 1498 }, { "epoch": 0.21120112715744982, "grad_norm": 1.2156668196462677, "learning_rate": 1.8326719769650244e-05, "loss": 0.5261, "step": 1499 }, { "epoch": 0.2113420218386756, "grad_norm": 1.1339478445689528, "learning_rate": 1.832419172530191e-05, "loss": 0.9338, "step": 1500 }, { "epoch": 0.21148291651990136, "grad_norm": 1.37539393344596, "learning_rate": 1.8321661947308903e-05, "loss": 0.5432, "step": 1501 }, { "epoch": 0.21162381120112717, "grad_norm": 0.9970448261237843, "learning_rate": 1.831913043619809e-05, "loss": 0.8893, "step": 1502 }, { "epoch": 0.21176470588235294, "grad_norm": 1.3851944849242777, "learning_rate": 1.83165971924967e-05, "loss": 0.5733, "step": 1503 }, { "epoch": 0.21190560056357874, "grad_norm": 1.0388023062674374, "learning_rate": 1.831406221673232e-05, "loss": 0.9426, "step": 1504 }, { "epoch": 0.2120464952448045, "grad_norm": 1.2288393454195423, "learning_rate": 1.83115255094329e-05, "loss": 0.9347, "step": 1505 }, { "epoch": 0.21218738992603028, "grad_norm": 1.2260781912030339, "learning_rate": 1.8308987071126747e-05, "loss": 0.5657, "step": 1506 }, { "epoch": 0.21232828460725608, "grad_norm": 1.2968105458762607, "learning_rate": 1.8306446902342532e-05, "loss": 0.5319, "step": 1507 }, { "epoch": 0.21246917928848186, "grad_norm": 0.9711709510371894, "learning_rate": 1.8303905003609292e-05, "loss": 0.8434, "step": 1508 }, { "epoch": 0.21261007396970763, "grad_norm": 1.2687664136851617, "learning_rate": 1.8301361375456408e-05, "loss": 0.5552, "step": 1509 }, { "epoch": 0.21275096865093343, "grad_norm": 1.0184039003823102, "learning_rate": 1.8298816018413634e-05, "loss": 0.8989, "step": 1510 }, { "epoch": 0.2128918633321592, "grad_norm": 1.125531923535542, "learning_rate": 1.8296268933011083e-05, "loss": 0.4864, "step": 1511 }, { "epoch": 0.213032758013385, "grad_norm": 1.0163476784389576, "learning_rate": 1.8293720119779223e-05, "loss": 0.8842, "step": 1512 }, { "epoch": 0.21317365269461078, "grad_norm": 1.181442191769144, "learning_rate": 1.8291169579248886e-05, "loss": 0.5194, "step": 1513 }, { "epoch": 0.21331454737583655, "grad_norm": 1.227964798424278, "learning_rate": 1.8288617311951263e-05, "loss": 0.9524, "step": 1514 }, { "epoch": 0.21345544205706235, "grad_norm": 1.0430737540923494, "learning_rate": 1.8286063318417903e-05, "loss": 0.8581, "step": 1515 }, { "epoch": 0.21359633673828812, "grad_norm": 1.064023534982832, "learning_rate": 1.8283507599180717e-05, "loss": 0.9625, "step": 1516 }, { "epoch": 0.21373723141951392, "grad_norm": 1.3341659965233252, "learning_rate": 1.8280950154771974e-05, "loss": 0.5192, "step": 1517 }, { "epoch": 0.2138781261007397, "grad_norm": 1.0032868572528018, "learning_rate": 1.82783909857243e-05, "loss": 0.9001, "step": 1518 }, { "epoch": 0.21401902078196547, "grad_norm": 1.0156402584900612, "learning_rate": 1.8275830092570683e-05, "loss": 0.9414, "step": 1519 }, { "epoch": 0.21415991546319127, "grad_norm": 0.9868554477844984, "learning_rate": 1.8273267475844468e-05, "loss": 0.8819, "step": 1520 }, { "epoch": 0.21430081014441704, "grad_norm": 1.0629401431867909, "learning_rate": 1.8270703136079366e-05, "loss": 0.919, "step": 1521 }, { "epoch": 0.21444170482564284, "grad_norm": 1.0279368803357158, "learning_rate": 1.8268137073809438e-05, "loss": 0.9221, "step": 1522 }, { "epoch": 0.21458259950686862, "grad_norm": 0.9925579879467, "learning_rate": 1.8265569289569108e-05, "loss": 0.9625, "step": 1523 }, { "epoch": 0.2147234941880944, "grad_norm": 0.9916132750767347, "learning_rate": 1.8262999783893155e-05, "loss": 0.9303, "step": 1524 }, { "epoch": 0.2148643888693202, "grad_norm": 0.9954513931189708, "learning_rate": 1.8260428557316724e-05, "loss": 0.9434, "step": 1525 }, { "epoch": 0.21500528355054596, "grad_norm": 1.4619581466316325, "learning_rate": 1.8257855610375307e-05, "loss": 0.6032, "step": 1526 }, { "epoch": 0.21514617823177176, "grad_norm": 1.3476428154679427, "learning_rate": 1.8255280943604766e-05, "loss": 0.5283, "step": 1527 }, { "epoch": 0.21528707291299753, "grad_norm": 1.0040873894971505, "learning_rate": 1.8252704557541318e-05, "loss": 0.891, "step": 1528 }, { "epoch": 0.2154279675942233, "grad_norm": 1.094208252646268, "learning_rate": 1.8250126452721527e-05, "loss": 0.8694, "step": 1529 }, { "epoch": 0.2155688622754491, "grad_norm": 0.9721370265406721, "learning_rate": 1.8247546629682335e-05, "loss": 0.9477, "step": 1530 }, { "epoch": 0.21570975695667488, "grad_norm": 0.9875380882503454, "learning_rate": 1.8244965088961024e-05, "loss": 0.8103, "step": 1531 }, { "epoch": 0.21585065163790068, "grad_norm": 1.0536365236195138, "learning_rate": 1.8242381831095244e-05, "loss": 0.9162, "step": 1532 }, { "epoch": 0.21599154631912645, "grad_norm": 0.9462299885932071, "learning_rate": 1.8239796856622997e-05, "loss": 0.9341, "step": 1533 }, { "epoch": 0.21613244100035223, "grad_norm": 0.9675187729816149, "learning_rate": 1.8237210166082645e-05, "loss": 0.9456, "step": 1534 }, { "epoch": 0.21627333568157803, "grad_norm": 0.9164973783874377, "learning_rate": 1.8234621760012913e-05, "loss": 0.8955, "step": 1535 }, { "epoch": 0.2164142303628038, "grad_norm": 1.1540652550549162, "learning_rate": 1.823203163895287e-05, "loss": 0.8885, "step": 1536 }, { "epoch": 0.2165551250440296, "grad_norm": 1.0832881233196907, "learning_rate": 1.8229439803441947e-05, "loss": 0.9302, "step": 1537 }, { "epoch": 0.21669601972525537, "grad_norm": 1.0388446507423739, "learning_rate": 1.8226846254019945e-05, "loss": 0.9141, "step": 1538 }, { "epoch": 0.21683691440648115, "grad_norm": 1.2966724196697668, "learning_rate": 1.822425099122701e-05, "loss": 0.5967, "step": 1539 }, { "epoch": 0.21697780908770695, "grad_norm": 1.3049375880816818, "learning_rate": 1.8221654015603637e-05, "loss": 0.5831, "step": 1540 }, { "epoch": 0.21711870376893272, "grad_norm": 1.0532222473250132, "learning_rate": 1.82190553276907e-05, "loss": 0.912, "step": 1541 }, { "epoch": 0.21725959845015852, "grad_norm": 1.1661030461801072, "learning_rate": 1.8216454928029407e-05, "loss": 0.4816, "step": 1542 }, { "epoch": 0.2174004931313843, "grad_norm": 0.9987452194037203, "learning_rate": 1.8213852817161338e-05, "loss": 0.9962, "step": 1543 }, { "epoch": 0.21754138781261007, "grad_norm": 1.2840802659770565, "learning_rate": 1.821124899562842e-05, "loss": 0.5679, "step": 1544 }, { "epoch": 0.21768228249383587, "grad_norm": 1.0130060177950315, "learning_rate": 1.820864346397294e-05, "loss": 0.9405, "step": 1545 }, { "epoch": 0.21782317717506164, "grad_norm": 1.2813369437638078, "learning_rate": 1.8206036222737546e-05, "loss": 0.5278, "step": 1546 }, { "epoch": 0.2179640718562874, "grad_norm": 1.251410855177681, "learning_rate": 1.8203427272465234e-05, "loss": 0.4495, "step": 1547 }, { "epoch": 0.2181049665375132, "grad_norm": 1.3369467055425523, "learning_rate": 1.820081661369936e-05, "loss": 0.6143, "step": 1548 }, { "epoch": 0.21824586121873898, "grad_norm": 1.0683119567881025, "learning_rate": 1.8198204246983632e-05, "loss": 0.9797, "step": 1549 }, { "epoch": 0.21838675589996479, "grad_norm": 1.375849944300712, "learning_rate": 1.8195590172862123e-05, "loss": 0.645, "step": 1550 }, { "epoch": 0.21852765058119056, "grad_norm": 1.0703631393191517, "learning_rate": 1.819297439187925e-05, "loss": 0.9463, "step": 1551 }, { "epoch": 0.21866854526241633, "grad_norm": 1.1642365516995707, "learning_rate": 1.819035690457979e-05, "loss": 0.9713, "step": 1552 }, { "epoch": 0.21880943994364213, "grad_norm": 1.0018415648977472, "learning_rate": 1.8187737711508882e-05, "loss": 0.9163, "step": 1553 }, { "epoch": 0.2189503346248679, "grad_norm": 1.2513665459661165, "learning_rate": 1.818511681321201e-05, "loss": 0.5521, "step": 1554 }, { "epoch": 0.2190912293060937, "grad_norm": 1.4628331901684435, "learning_rate": 1.818249421023502e-05, "loss": 0.521, "step": 1555 }, { "epoch": 0.21923212398731948, "grad_norm": 1.457794118439038, "learning_rate": 1.8179869903124103e-05, "loss": 0.7106, "step": 1556 }, { "epoch": 0.21937301866854525, "grad_norm": 1.0342302098038336, "learning_rate": 1.817724389242582e-05, "loss": 0.9362, "step": 1557 }, { "epoch": 0.21951391334977105, "grad_norm": 1.0652754876138384, "learning_rate": 1.817461617868708e-05, "loss": 0.9372, "step": 1558 }, { "epoch": 0.21965480803099682, "grad_norm": 0.9410042671199803, "learning_rate": 1.8171986762455145e-05, "loss": 0.9057, "step": 1559 }, { "epoch": 0.21979570271222262, "grad_norm": 1.192239877261138, "learning_rate": 1.816935564427763e-05, "loss": 0.4096, "step": 1560 }, { "epoch": 0.2199365973934484, "grad_norm": 1.0110391938315904, "learning_rate": 1.8166722824702506e-05, "loss": 0.9363, "step": 1561 }, { "epoch": 0.22007749207467417, "grad_norm": 1.1166064240861073, "learning_rate": 1.8164088304278106e-05, "loss": 0.9714, "step": 1562 }, { "epoch": 0.22021838675589997, "grad_norm": 0.993456683401987, "learning_rate": 1.8161452083553103e-05, "loss": 0.9328, "step": 1563 }, { "epoch": 0.22035928143712574, "grad_norm": 0.9924973829998457, "learning_rate": 1.8158814163076534e-05, "loss": 0.8861, "step": 1564 }, { "epoch": 0.22050017611835154, "grad_norm": 0.9778879508892544, "learning_rate": 1.815617454339779e-05, "loss": 0.8324, "step": 1565 }, { "epoch": 0.22064107079957732, "grad_norm": 0.9326938607922675, "learning_rate": 1.8153533225066614e-05, "loss": 0.9402, "step": 1566 }, { "epoch": 0.2207819654808031, "grad_norm": 1.2579479926062116, "learning_rate": 1.81508902086331e-05, "loss": 0.5706, "step": 1567 }, { "epoch": 0.2209228601620289, "grad_norm": 1.0253566972092076, "learning_rate": 1.81482454946477e-05, "loss": 0.9747, "step": 1568 }, { "epoch": 0.22106375484325466, "grad_norm": 1.4070469871062226, "learning_rate": 1.8145599083661212e-05, "loss": 0.61, "step": 1569 }, { "epoch": 0.22120464952448046, "grad_norm": 1.1920911683369415, "learning_rate": 1.8142950976224796e-05, "loss": 0.9371, "step": 1570 }, { "epoch": 0.22134554420570624, "grad_norm": 1.3516327862182105, "learning_rate": 1.8140301172889968e-05, "loss": 0.9032, "step": 1571 }, { "epoch": 0.221486438886932, "grad_norm": 0.9986135648907217, "learning_rate": 1.8137649674208585e-05, "loss": 0.9179, "step": 1572 }, { "epoch": 0.2216273335681578, "grad_norm": 1.1682695503042573, "learning_rate": 1.8134996480732866e-05, "loss": 0.9804, "step": 1573 }, { "epoch": 0.22176822824938358, "grad_norm": 0.9991261835263899, "learning_rate": 1.8132341593015377e-05, "loss": 0.9537, "step": 1574 }, { "epoch": 0.22190912293060938, "grad_norm": 1.1316777996444218, "learning_rate": 1.8129685011609047e-05, "loss": 0.9674, "step": 1575 }, { "epoch": 0.22205001761183515, "grad_norm": 1.0327302622717938, "learning_rate": 1.8127026737067144e-05, "loss": 0.9723, "step": 1576 }, { "epoch": 0.22219091229306093, "grad_norm": 1.1040169311069234, "learning_rate": 1.8124366769943302e-05, "loss": 0.9357, "step": 1577 }, { "epoch": 0.22233180697428673, "grad_norm": 1.0628116375604657, "learning_rate": 1.81217051107915e-05, "loss": 0.939, "step": 1578 }, { "epoch": 0.2224727016555125, "grad_norm": 1.0778851724086638, "learning_rate": 1.8119041760166065e-05, "loss": 0.4578, "step": 1579 }, { "epoch": 0.2226135963367383, "grad_norm": 0.9665812748131316, "learning_rate": 1.8116376718621688e-05, "loss": 0.9274, "step": 1580 }, { "epoch": 0.22275449101796407, "grad_norm": 1.086285286933484, "learning_rate": 1.81137099867134e-05, "loss": 0.9084, "step": 1581 }, { "epoch": 0.22289538569918985, "grad_norm": 1.041169441796805, "learning_rate": 1.81110415649966e-05, "loss": 0.9695, "step": 1582 }, { "epoch": 0.22303628038041565, "grad_norm": 1.288034080941784, "learning_rate": 1.8108371454027022e-05, "loss": 0.5611, "step": 1583 }, { "epoch": 0.22317717506164142, "grad_norm": 1.0338096206107292, "learning_rate": 1.8105699654360757e-05, "loss": 0.9753, "step": 1584 }, { "epoch": 0.22331806974286722, "grad_norm": 1.0484937551918752, "learning_rate": 1.8103026166554254e-05, "loss": 0.8849, "step": 1585 }, { "epoch": 0.223458964424093, "grad_norm": 1.1826777802545847, "learning_rate": 1.8100350991164308e-05, "loss": 0.5086, "step": 1586 }, { "epoch": 0.22359985910531877, "grad_norm": 1.0186898714596089, "learning_rate": 1.809767412874807e-05, "loss": 0.9521, "step": 1587 }, { "epoch": 0.22374075378654457, "grad_norm": 1.15861773112525, "learning_rate": 1.809499557986303e-05, "loss": 0.4953, "step": 1588 }, { "epoch": 0.22388164846777034, "grad_norm": 1.0830448595993067, "learning_rate": 1.8092315345067044e-05, "loss": 0.8998, "step": 1589 }, { "epoch": 0.2240225431489961, "grad_norm": 0.991299093421682, "learning_rate": 1.8089633424918317e-05, "loss": 0.8956, "step": 1590 }, { "epoch": 0.2241634378302219, "grad_norm": 1.131643993894173, "learning_rate": 1.8086949819975393e-05, "loss": 0.9481, "step": 1591 }, { "epoch": 0.22430433251144768, "grad_norm": 1.0959633529466082, "learning_rate": 1.808426453079718e-05, "loss": 0.9548, "step": 1592 }, { "epoch": 0.22444522719267349, "grad_norm": 0.8980410073476857, "learning_rate": 1.8081577557942937e-05, "loss": 0.9149, "step": 1593 }, { "epoch": 0.22458612187389926, "grad_norm": 1.4106355896735974, "learning_rate": 1.8078888901972254e-05, "loss": 0.5307, "step": 1594 }, { "epoch": 0.22472701655512503, "grad_norm": 1.3761769337155627, "learning_rate": 1.8076198563445103e-05, "loss": 0.5882, "step": 1595 }, { "epoch": 0.22486791123635083, "grad_norm": 0.9123432250931839, "learning_rate": 1.807350654292178e-05, "loss": 0.8689, "step": 1596 }, { "epoch": 0.2250088059175766, "grad_norm": 1.255412527463865, "learning_rate": 1.807081284096294e-05, "loss": 0.5818, "step": 1597 }, { "epoch": 0.2251497005988024, "grad_norm": 1.2640747691483467, "learning_rate": 1.806811745812959e-05, "loss": 0.5543, "step": 1598 }, { "epoch": 0.22529059528002818, "grad_norm": 1.0933783196131075, "learning_rate": 1.806542039498309e-05, "loss": 0.9934, "step": 1599 }, { "epoch": 0.22543148996125395, "grad_norm": 1.506574663702151, "learning_rate": 1.8062721652085146e-05, "loss": 0.522, "step": 1600 }, { "epoch": 0.22557238464247975, "grad_norm": 1.2480264375063956, "learning_rate": 1.806002122999781e-05, "loss": 0.9427, "step": 1601 }, { "epoch": 0.22571327932370552, "grad_norm": 1.0574074272721083, "learning_rate": 1.8057319129283492e-05, "loss": 0.9482, "step": 1602 }, { "epoch": 0.22585417400493132, "grad_norm": 1.4122785859257794, "learning_rate": 1.805461535050494e-05, "loss": 0.5831, "step": 1603 }, { "epoch": 0.2259950686861571, "grad_norm": 1.5700840949865575, "learning_rate": 1.8051909894225266e-05, "loss": 0.5795, "step": 1604 }, { "epoch": 0.22613596336738287, "grad_norm": 1.0773885036364468, "learning_rate": 1.804920276100792e-05, "loss": 0.9493, "step": 1605 }, { "epoch": 0.22627685804860867, "grad_norm": 1.0259177495976533, "learning_rate": 1.8046493951416712e-05, "loss": 0.8975, "step": 1606 }, { "epoch": 0.22641775272983444, "grad_norm": 1.1330321464267505, "learning_rate": 1.8043783466015784e-05, "loss": 0.9617, "step": 1607 }, { "epoch": 0.22655864741106024, "grad_norm": 1.0025083743334013, "learning_rate": 1.8041071305369645e-05, "loss": 0.9432, "step": 1608 }, { "epoch": 0.22669954209228602, "grad_norm": 1.11500746895885, "learning_rate": 1.8038357470043145e-05, "loss": 0.9014, "step": 1609 }, { "epoch": 0.2268404367735118, "grad_norm": 0.9757702815251129, "learning_rate": 1.8035641960601478e-05, "loss": 0.996, "step": 1610 }, { "epoch": 0.2269813314547376, "grad_norm": 1.2006246288575986, "learning_rate": 1.80329247776102e-05, "loss": 0.9594, "step": 1611 }, { "epoch": 0.22712222613596336, "grad_norm": 1.4259723578236358, "learning_rate": 1.8030205921635194e-05, "loss": 0.5106, "step": 1612 }, { "epoch": 0.22726312081718916, "grad_norm": 1.3991566863750557, "learning_rate": 1.802748539324272e-05, "loss": 0.612, "step": 1613 }, { "epoch": 0.22740401549841494, "grad_norm": 1.0490365577416894, "learning_rate": 1.802476319299936e-05, "loss": 0.9473, "step": 1614 }, { "epoch": 0.2275449101796407, "grad_norm": 1.0007969757619315, "learning_rate": 1.802203932147206e-05, "loss": 0.9558, "step": 1615 }, { "epoch": 0.2276858048608665, "grad_norm": 1.3368809023739308, "learning_rate": 1.8019313779228113e-05, "loss": 0.5544, "step": 1616 }, { "epoch": 0.22782669954209228, "grad_norm": 1.4951230068266765, "learning_rate": 1.801658656683515e-05, "loss": 0.5382, "step": 1617 }, { "epoch": 0.22796759422331808, "grad_norm": 1.0440642884306208, "learning_rate": 1.801385768486116e-05, "loss": 0.9397, "step": 1618 }, { "epoch": 0.22810848890454385, "grad_norm": 0.9998965011216159, "learning_rate": 1.8011127133874474e-05, "loss": 0.956, "step": 1619 }, { "epoch": 0.22824938358576963, "grad_norm": 1.237291312517905, "learning_rate": 1.8008394914443772e-05, "loss": 0.9527, "step": 1620 }, { "epoch": 0.22839027826699543, "grad_norm": 0.9766030331313832, "learning_rate": 1.8005661027138085e-05, "loss": 0.8949, "step": 1621 }, { "epoch": 0.2285311729482212, "grad_norm": 1.1029903290932672, "learning_rate": 1.800292547252678e-05, "loss": 0.8842, "step": 1622 }, { "epoch": 0.228672067629447, "grad_norm": 0.8737874370550867, "learning_rate": 1.8000188251179592e-05, "loss": 0.8713, "step": 1623 }, { "epoch": 0.22881296231067277, "grad_norm": 1.0944159038347743, "learning_rate": 1.799744936366658e-05, "loss": 0.9292, "step": 1624 }, { "epoch": 0.22895385699189855, "grad_norm": 1.1079781557206998, "learning_rate": 1.7994708810558168e-05, "loss": 0.9422, "step": 1625 }, { "epoch": 0.22909475167312435, "grad_norm": 1.0232599487502287, "learning_rate": 1.7991966592425116e-05, "loss": 0.9196, "step": 1626 }, { "epoch": 0.22923564635435012, "grad_norm": 1.0534047570875222, "learning_rate": 1.7989222709838534e-05, "loss": 0.9917, "step": 1627 }, { "epoch": 0.22937654103557592, "grad_norm": 0.9405692230111284, "learning_rate": 1.798647716336988e-05, "loss": 0.8963, "step": 1628 }, { "epoch": 0.2295174357168017, "grad_norm": 1.1541950182594851, "learning_rate": 1.7983729953590957e-05, "loss": 0.9047, "step": 1629 }, { "epoch": 0.22965833039802747, "grad_norm": 1.664973433568021, "learning_rate": 1.7980981081073915e-05, "loss": 0.6067, "step": 1630 }, { "epoch": 0.22979922507925327, "grad_norm": 1.14819129607456, "learning_rate": 1.797823054639125e-05, "loss": 0.9566, "step": 1631 }, { "epoch": 0.22994011976047904, "grad_norm": 1.5471390454383438, "learning_rate": 1.7975478350115808e-05, "loss": 0.6082, "step": 1632 }, { "epoch": 0.2300810144417048, "grad_norm": 1.0553261222405586, "learning_rate": 1.797272449282077e-05, "loss": 0.8925, "step": 1633 }, { "epoch": 0.2302219091229306, "grad_norm": 1.0898122563829924, "learning_rate": 1.796996897507968e-05, "loss": 0.8962, "step": 1634 }, { "epoch": 0.23036280380415639, "grad_norm": 0.9984262783702543, "learning_rate": 1.7967211797466405e-05, "loss": 0.8907, "step": 1635 }, { "epoch": 0.23050369848538219, "grad_norm": 1.3581816623671839, "learning_rate": 1.7964452960555185e-05, "loss": 0.6138, "step": 1636 }, { "epoch": 0.23064459316660796, "grad_norm": 0.975691888541787, "learning_rate": 1.7961692464920585e-05, "loss": 0.9366, "step": 1637 }, { "epoch": 0.23078548784783373, "grad_norm": 1.1073713082951249, "learning_rate": 1.795893031113752e-05, "loss": 0.9549, "step": 1638 }, { "epoch": 0.23092638252905953, "grad_norm": 1.0237696494954984, "learning_rate": 1.7956166499781253e-05, "loss": 0.8839, "step": 1639 }, { "epoch": 0.2310672772102853, "grad_norm": 1.3829498097130777, "learning_rate": 1.795340103142739e-05, "loss": 0.5752, "step": 1640 }, { "epoch": 0.2312081718915111, "grad_norm": 1.2842975398681709, "learning_rate": 1.7950633906651894e-05, "loss": 0.6102, "step": 1641 }, { "epoch": 0.23134906657273688, "grad_norm": 1.3871561859757084, "learning_rate": 1.794786512603105e-05, "loss": 0.5216, "step": 1642 }, { "epoch": 0.23148996125396265, "grad_norm": 1.0785324841836879, "learning_rate": 1.794509469014151e-05, "loss": 0.915, "step": 1643 }, { "epoch": 0.23163085593518845, "grad_norm": 1.0103916320534334, "learning_rate": 1.794232259956025e-05, "loss": 0.8644, "step": 1644 }, { "epoch": 0.23177175061641422, "grad_norm": 1.103517920784546, "learning_rate": 1.7939548854864612e-05, "loss": 0.9285, "step": 1645 }, { "epoch": 0.23191264529764002, "grad_norm": 1.5270836422398586, "learning_rate": 1.7936773456632267e-05, "loss": 0.6613, "step": 1646 }, { "epoch": 0.2320535399788658, "grad_norm": 1.2973150102698388, "learning_rate": 1.7933996405441235e-05, "loss": 0.59, "step": 1647 }, { "epoch": 0.23219443466009157, "grad_norm": 1.2732081048897537, "learning_rate": 1.7931217701869886e-05, "loss": 0.5305, "step": 1648 }, { "epoch": 0.23233532934131737, "grad_norm": 1.3416158619908083, "learning_rate": 1.7928437346496924e-05, "loss": 0.6541, "step": 1649 }, { "epoch": 0.23247622402254314, "grad_norm": 1.0829569645495527, "learning_rate": 1.7925655339901403e-05, "loss": 0.9382, "step": 1650 }, { "epoch": 0.23261711870376894, "grad_norm": 1.0067470910383947, "learning_rate": 1.7922871682662722e-05, "loss": 0.8918, "step": 1651 }, { "epoch": 0.23275801338499472, "grad_norm": 1.5091064688867015, "learning_rate": 1.792008637536062e-05, "loss": 0.6123, "step": 1652 }, { "epoch": 0.2328989080662205, "grad_norm": 1.3804109236307558, "learning_rate": 1.791729941857518e-05, "loss": 0.5884, "step": 1653 }, { "epoch": 0.2330398027474463, "grad_norm": 1.0382075434767195, "learning_rate": 1.7914510812886837e-05, "loss": 0.9063, "step": 1654 }, { "epoch": 0.23318069742867206, "grad_norm": 0.9733947566716785, "learning_rate": 1.791172055887635e-05, "loss": 0.9492, "step": 1655 }, { "epoch": 0.23332159210989786, "grad_norm": 1.2270226743034687, "learning_rate": 1.7908928657124844e-05, "loss": 0.5733, "step": 1656 }, { "epoch": 0.23346248679112364, "grad_norm": 1.1359754569841973, "learning_rate": 1.7906135108213772e-05, "loss": 0.9671, "step": 1657 }, { "epoch": 0.2336033814723494, "grad_norm": 1.5803965102747632, "learning_rate": 1.790333991272494e-05, "loss": 0.5541, "step": 1658 }, { "epoch": 0.2337442761535752, "grad_norm": 1.1250918567777217, "learning_rate": 1.7900543071240482e-05, "loss": 0.9289, "step": 1659 }, { "epoch": 0.23388517083480098, "grad_norm": 1.3030712337034451, "learning_rate": 1.789774458434289e-05, "loss": 0.4962, "step": 1660 }, { "epoch": 0.23402606551602678, "grad_norm": 1.4777670492180848, "learning_rate": 1.7894944452614994e-05, "loss": 0.6134, "step": 1661 }, { "epoch": 0.23416696019725255, "grad_norm": 1.3318984513181826, "learning_rate": 1.7892142676639967e-05, "loss": 0.6427, "step": 1662 }, { "epoch": 0.23430785487847833, "grad_norm": 1.059956649106464, "learning_rate": 1.788933925700132e-05, "loss": 0.9094, "step": 1663 }, { "epoch": 0.23444874955970413, "grad_norm": 1.3742080814322184, "learning_rate": 1.788653419428291e-05, "loss": 0.6409, "step": 1664 }, { "epoch": 0.2345896442409299, "grad_norm": 1.0028693519354597, "learning_rate": 1.788372748906894e-05, "loss": 0.9296, "step": 1665 }, { "epoch": 0.2347305389221557, "grad_norm": 1.0617875510519623, "learning_rate": 1.7880919141943944e-05, "loss": 0.937, "step": 1666 }, { "epoch": 0.23487143360338147, "grad_norm": 1.4275926013973141, "learning_rate": 1.7878109153492808e-05, "loss": 0.4784, "step": 1667 }, { "epoch": 0.23501232828460725, "grad_norm": 1.6380480508457247, "learning_rate": 1.7875297524300757e-05, "loss": 0.652, "step": 1668 }, { "epoch": 0.23515322296583305, "grad_norm": 1.050858582243041, "learning_rate": 1.7872484254953354e-05, "loss": 0.9521, "step": 1669 }, { "epoch": 0.23529411764705882, "grad_norm": 1.0068738353509732, "learning_rate": 1.7869669346036514e-05, "loss": 0.9222, "step": 1670 }, { "epoch": 0.23543501232828462, "grad_norm": 1.1880151320316503, "learning_rate": 1.7866852798136478e-05, "loss": 0.9853, "step": 1671 }, { "epoch": 0.2355759070095104, "grad_norm": 1.2481173080607382, "learning_rate": 1.7864034611839843e-05, "loss": 0.4875, "step": 1672 }, { "epoch": 0.23571680169073617, "grad_norm": 0.9745840602354304, "learning_rate": 1.7861214787733542e-05, "loss": 0.9026, "step": 1673 }, { "epoch": 0.23585769637196197, "grad_norm": 1.1116407713180207, "learning_rate": 1.785839332640484e-05, "loss": 1.0153, "step": 1674 }, { "epoch": 0.23599859105318774, "grad_norm": 1.025337895023668, "learning_rate": 1.7855570228441354e-05, "loss": 0.9142, "step": 1675 }, { "epoch": 0.2361394857344135, "grad_norm": 0.9119558287645569, "learning_rate": 1.7852745494431043e-05, "loss": 0.8774, "step": 1676 }, { "epoch": 0.2362803804156393, "grad_norm": 0.877490881282966, "learning_rate": 1.7849919124962197e-05, "loss": 0.9067, "step": 1677 }, { "epoch": 0.23642127509686509, "grad_norm": 1.3375857425038296, "learning_rate": 1.7847091120623457e-05, "loss": 0.5557, "step": 1678 }, { "epoch": 0.23656216977809089, "grad_norm": 1.1140855639812293, "learning_rate": 1.78442614820038e-05, "loss": 0.9705, "step": 1679 }, { "epoch": 0.23670306445931666, "grad_norm": 1.2516767067043812, "learning_rate": 1.7841430209692537e-05, "loss": 0.5676, "step": 1680 }, { "epoch": 0.23684395914054243, "grad_norm": 1.10853594144277, "learning_rate": 1.7838597304279335e-05, "loss": 0.893, "step": 1681 }, { "epoch": 0.23698485382176823, "grad_norm": 1.004599535394311, "learning_rate": 1.783576276635418e-05, "loss": 0.9596, "step": 1682 }, { "epoch": 0.237125748502994, "grad_norm": 1.3927124248142566, "learning_rate": 1.7832926596507416e-05, "loss": 0.524, "step": 1683 }, { "epoch": 0.2372666431842198, "grad_norm": 1.2037337952397338, "learning_rate": 1.783008879532972e-05, "loss": 0.959, "step": 1684 }, { "epoch": 0.23740753786544558, "grad_norm": 1.3028363530087068, "learning_rate": 1.7827249363412105e-05, "loss": 0.4915, "step": 1685 }, { "epoch": 0.23754843254667135, "grad_norm": 1.0719320605805416, "learning_rate": 1.7824408301345934e-05, "loss": 0.9415, "step": 1686 }, { "epoch": 0.23768932722789715, "grad_norm": 0.9670566519438513, "learning_rate": 1.7821565609722898e-05, "loss": 0.9001, "step": 1687 }, { "epoch": 0.23783022190912292, "grad_norm": 1.2115528904086428, "learning_rate": 1.7818721289135035e-05, "loss": 0.5511, "step": 1688 }, { "epoch": 0.23797111659034872, "grad_norm": 1.0773184110383942, "learning_rate": 1.7815875340174718e-05, "loss": 0.8975, "step": 1689 }, { "epoch": 0.2381120112715745, "grad_norm": 1.0703323988722042, "learning_rate": 1.7813027763434666e-05, "loss": 0.9477, "step": 1690 }, { "epoch": 0.23825290595280027, "grad_norm": 1.0728474111733561, "learning_rate": 1.7810178559507926e-05, "loss": 0.8949, "step": 1691 }, { "epoch": 0.23839380063402607, "grad_norm": 1.0008338074761538, "learning_rate": 1.780732772898789e-05, "loss": 0.941, "step": 1692 }, { "epoch": 0.23853469531525184, "grad_norm": 1.0471221172873015, "learning_rate": 1.780447527246829e-05, "loss": 0.909, "step": 1693 }, { "epoch": 0.23867558999647764, "grad_norm": 1.053308457016988, "learning_rate": 1.7801621190543195e-05, "loss": 0.9403, "step": 1694 }, { "epoch": 0.23881648467770342, "grad_norm": 0.9377595150055602, "learning_rate": 1.7798765483807016e-05, "loss": 0.8806, "step": 1695 }, { "epoch": 0.2389573793589292, "grad_norm": 1.0414409830584281, "learning_rate": 1.7795908152854497e-05, "loss": 0.9122, "step": 1696 }, { "epoch": 0.239098274040155, "grad_norm": 1.0300358298008756, "learning_rate": 1.779304919828072e-05, "loss": 0.9416, "step": 1697 }, { "epoch": 0.23923916872138076, "grad_norm": 1.1492379729370674, "learning_rate": 1.779018862068111e-05, "loss": 0.8983, "step": 1698 }, { "epoch": 0.23938006340260656, "grad_norm": 0.9663907004629525, "learning_rate": 1.7787326420651428e-05, "loss": 0.9398, "step": 1699 }, { "epoch": 0.23952095808383234, "grad_norm": 1.412264358084756, "learning_rate": 1.7784462598787772e-05, "loss": 0.5501, "step": 1700 }, { "epoch": 0.2396618527650581, "grad_norm": 0.989768152777451, "learning_rate": 1.7781597155686575e-05, "loss": 0.9585, "step": 1701 }, { "epoch": 0.2398027474462839, "grad_norm": 1.755515182417393, "learning_rate": 1.7778730091944615e-05, "loss": 0.6418, "step": 1702 }, { "epoch": 0.23994364212750968, "grad_norm": 1.057941702746969, "learning_rate": 1.7775861408158998e-05, "loss": 0.9448, "step": 1703 }, { "epoch": 0.24008453680873548, "grad_norm": 1.0342557046012941, "learning_rate": 1.7772991104927178e-05, "loss": 0.8808, "step": 1704 }, { "epoch": 0.24022543148996126, "grad_norm": 1.0940378874635401, "learning_rate": 1.7770119182846942e-05, "loss": 0.9837, "step": 1705 }, { "epoch": 0.24036632617118703, "grad_norm": 1.5880512355339165, "learning_rate": 1.7767245642516413e-05, "loss": 0.7088, "step": 1706 }, { "epoch": 0.24050722085241283, "grad_norm": 1.1440611665425502, "learning_rate": 1.7764370484534047e-05, "loss": 0.9995, "step": 1707 }, { "epoch": 0.2406481155336386, "grad_norm": 1.0589571752194975, "learning_rate": 1.776149370949864e-05, "loss": 0.9318, "step": 1708 }, { "epoch": 0.2407890102148644, "grad_norm": 0.9437643914826176, "learning_rate": 1.7758615318009332e-05, "loss": 0.9041, "step": 1709 }, { "epoch": 0.24092990489609017, "grad_norm": 0.9455735322401023, "learning_rate": 1.7755735310665592e-05, "loss": 0.9427, "step": 1710 }, { "epoch": 0.24107079957731595, "grad_norm": 1.0733008027539463, "learning_rate": 1.7752853688067227e-05, "loss": 0.9357, "step": 1711 }, { "epoch": 0.24121169425854175, "grad_norm": 1.3608683634272782, "learning_rate": 1.7749970450814376e-05, "loss": 0.6698, "step": 1712 }, { "epoch": 0.24135258893976752, "grad_norm": 1.1565707380344732, "learning_rate": 1.7747085599507525e-05, "loss": 0.9067, "step": 1713 }, { "epoch": 0.24149348362099332, "grad_norm": 1.4874533866386792, "learning_rate": 1.7744199134747488e-05, "loss": 0.5374, "step": 1714 }, { "epoch": 0.2416343783022191, "grad_norm": 0.9777279185148695, "learning_rate": 1.7741311057135414e-05, "loss": 0.919, "step": 1715 }, { "epoch": 0.24177527298344487, "grad_norm": 0.9795390383859958, "learning_rate": 1.7738421367272795e-05, "loss": 0.9374, "step": 1716 }, { "epoch": 0.24191616766467067, "grad_norm": 1.1242176057393063, "learning_rate": 1.7735530065761448e-05, "loss": 0.9557, "step": 1717 }, { "epoch": 0.24205706234589644, "grad_norm": 1.4851010340798452, "learning_rate": 1.7732637153203542e-05, "loss": 0.4936, "step": 1718 }, { "epoch": 0.2421979570271222, "grad_norm": 1.09310506381005, "learning_rate": 1.7729742630201565e-05, "loss": 0.9334, "step": 1719 }, { "epoch": 0.242338851708348, "grad_norm": 1.4661668471511649, "learning_rate": 1.772684649735835e-05, "loss": 0.6019, "step": 1720 }, { "epoch": 0.24247974638957379, "grad_norm": 0.9523442021315837, "learning_rate": 1.7723948755277063e-05, "loss": 0.9223, "step": 1721 }, { "epoch": 0.2426206410707996, "grad_norm": 1.0194604772430569, "learning_rate": 1.77210494045612e-05, "loss": 0.929, "step": 1722 }, { "epoch": 0.24276153575202536, "grad_norm": 1.3938284501204616, "learning_rate": 1.77181484458146e-05, "loss": 0.4827, "step": 1723 }, { "epoch": 0.24290243043325113, "grad_norm": 1.0856301705963878, "learning_rate": 1.7715245879641435e-05, "loss": 0.9656, "step": 1724 }, { "epoch": 0.24304332511447693, "grad_norm": 1.6020369257672387, "learning_rate": 1.771234170664621e-05, "loss": 0.4836, "step": 1725 }, { "epoch": 0.2431842197957027, "grad_norm": 1.0494473260673856, "learning_rate": 1.770943592743376e-05, "loss": 0.9078, "step": 1726 }, { "epoch": 0.2433251144769285, "grad_norm": 0.9906621228640359, "learning_rate": 1.7706528542609266e-05, "loss": 1.0081, "step": 1727 }, { "epoch": 0.24346600915815428, "grad_norm": 1.233989399307612, "learning_rate": 1.7703619552778234e-05, "loss": 0.9311, "step": 1728 }, { "epoch": 0.24360690383938005, "grad_norm": 1.047410075641301, "learning_rate": 1.7700708958546505e-05, "loss": 0.8997, "step": 1729 }, { "epoch": 0.24374779852060585, "grad_norm": 1.0515127925352976, "learning_rate": 1.7697796760520262e-05, "loss": 0.965, "step": 1730 }, { "epoch": 0.24388869320183162, "grad_norm": 1.0498112800114927, "learning_rate": 1.7694882959306012e-05, "loss": 0.8707, "step": 1731 }, { "epoch": 0.24402958788305742, "grad_norm": 0.9928811020249229, "learning_rate": 1.76919675555106e-05, "loss": 0.9604, "step": 1732 }, { "epoch": 0.2441704825642832, "grad_norm": 1.3718140225458118, "learning_rate": 1.768905054974121e-05, "loss": 0.6074, "step": 1733 }, { "epoch": 0.24431137724550897, "grad_norm": 0.9460460955420026, "learning_rate": 1.7686131942605346e-05, "loss": 0.8593, "step": 1734 }, { "epoch": 0.24445227192673477, "grad_norm": 1.1731560716465903, "learning_rate": 1.7683211734710863e-05, "loss": 0.5192, "step": 1735 }, { "epoch": 0.24459316660796054, "grad_norm": 1.3749826860309025, "learning_rate": 1.7680289926665937e-05, "loss": 0.6652, "step": 1736 }, { "epoch": 0.24473406128918634, "grad_norm": 0.9295637367959642, "learning_rate": 1.7677366519079074e-05, "loss": 0.9154, "step": 1737 }, { "epoch": 0.24487495597041212, "grad_norm": 1.2959544542452108, "learning_rate": 1.7674441512559135e-05, "loss": 0.6359, "step": 1738 }, { "epoch": 0.2450158506516379, "grad_norm": 1.031499782256601, "learning_rate": 1.7671514907715282e-05, "loss": 0.8995, "step": 1739 }, { "epoch": 0.2451567453328637, "grad_norm": 0.9879704759367753, "learning_rate": 1.766858670515704e-05, "loss": 0.8808, "step": 1740 }, { "epoch": 0.24529764001408946, "grad_norm": 1.3794976042434917, "learning_rate": 1.7665656905494245e-05, "loss": 0.5171, "step": 1741 }, { "epoch": 0.24543853469531526, "grad_norm": 1.0581538132292996, "learning_rate": 1.766272550933708e-05, "loss": 0.893, "step": 1742 }, { "epoch": 0.24557942937654104, "grad_norm": 1.0375330894666934, "learning_rate": 1.7659792517296053e-05, "loss": 0.956, "step": 1743 }, { "epoch": 0.2457203240577668, "grad_norm": 0.902653775000328, "learning_rate": 1.7656857929982002e-05, "loss": 0.8992, "step": 1744 }, { "epoch": 0.2458612187389926, "grad_norm": 1.3289115567814773, "learning_rate": 1.7653921748006106e-05, "loss": 0.5242, "step": 1745 }, { "epoch": 0.24600211342021838, "grad_norm": 0.9777306372391762, "learning_rate": 1.7650983971979873e-05, "loss": 0.9449, "step": 1746 }, { "epoch": 0.24614300810144418, "grad_norm": 1.1046736447760102, "learning_rate": 1.7648044602515133e-05, "loss": 0.5442, "step": 1747 }, { "epoch": 0.24628390278266996, "grad_norm": 0.9369193523882737, "learning_rate": 1.7645103640224063e-05, "loss": 0.9667, "step": 1748 }, { "epoch": 0.24642479746389573, "grad_norm": 1.0590274933055701, "learning_rate": 1.7642161085719164e-05, "loss": 0.9183, "step": 1749 }, { "epoch": 0.24656569214512153, "grad_norm": 1.3298175440751332, "learning_rate": 1.7639216939613265e-05, "loss": 0.4848, "step": 1750 }, { "epoch": 0.2467065868263473, "grad_norm": 1.5050599221760517, "learning_rate": 1.763627120251954e-05, "loss": 0.6406, "step": 1751 }, { "epoch": 0.2468474815075731, "grad_norm": 0.9550625334815187, "learning_rate": 1.7633323875051478e-05, "loss": 0.8618, "step": 1752 }, { "epoch": 0.24698837618879887, "grad_norm": 1.2966626296467818, "learning_rate": 1.763037495782291e-05, "loss": 0.599, "step": 1753 }, { "epoch": 0.24712927087002465, "grad_norm": 1.534040973794668, "learning_rate": 1.762742445144799e-05, "loss": 0.5572, "step": 1754 }, { "epoch": 0.24727016555125045, "grad_norm": 1.1009124629311686, "learning_rate": 1.7624472356541216e-05, "loss": 0.9377, "step": 1755 }, { "epoch": 0.24741106023247622, "grad_norm": 0.9419185266064708, "learning_rate": 1.7621518673717402e-05, "loss": 0.8748, "step": 1756 }, { "epoch": 0.24755195491370202, "grad_norm": 1.7040381028289995, "learning_rate": 1.76185634035917e-05, "loss": 0.7159, "step": 1757 }, { "epoch": 0.2476928495949278, "grad_norm": 1.0026376249443272, "learning_rate": 1.7615606546779596e-05, "loss": 0.9164, "step": 1758 }, { "epoch": 0.24783374427615357, "grad_norm": 1.0338710388197196, "learning_rate": 1.7612648103896895e-05, "loss": 0.8882, "step": 1759 }, { "epoch": 0.24797463895737937, "grad_norm": 1.2116862998007991, "learning_rate": 1.7609688075559746e-05, "loss": 0.492, "step": 1760 }, { "epoch": 0.24811553363860514, "grad_norm": 1.0104333011178421, "learning_rate": 1.7606726462384616e-05, "loss": 0.8948, "step": 1761 }, { "epoch": 0.2482564283198309, "grad_norm": 1.0777100130532142, "learning_rate": 1.7603763264988313e-05, "loss": 0.915, "step": 1762 }, { "epoch": 0.2483973230010567, "grad_norm": 1.460333283869515, "learning_rate": 1.760079848398797e-05, "loss": 0.6944, "step": 1763 }, { "epoch": 0.2485382176822825, "grad_norm": 1.1209072941065779, "learning_rate": 1.7597832120001045e-05, "loss": 0.924, "step": 1764 }, { "epoch": 0.2486791123635083, "grad_norm": 1.068308712338838, "learning_rate": 1.759486417364533e-05, "loss": 0.9081, "step": 1765 }, { "epoch": 0.24882000704473406, "grad_norm": 0.9914376972840575, "learning_rate": 1.759189464553895e-05, "loss": 0.9238, "step": 1766 }, { "epoch": 0.24896090172595983, "grad_norm": 0.9577948351739042, "learning_rate": 1.758892353630036e-05, "loss": 0.8647, "step": 1767 }, { "epoch": 0.24910179640718563, "grad_norm": 1.2773893035614357, "learning_rate": 1.7585950846548333e-05, "loss": 0.5415, "step": 1768 }, { "epoch": 0.2492426910884114, "grad_norm": 1.1568610885039454, "learning_rate": 1.7582976576901982e-05, "loss": 0.9246, "step": 1769 }, { "epoch": 0.2493835857696372, "grad_norm": 1.2160047494734396, "learning_rate": 1.7580000727980743e-05, "loss": 1.0061, "step": 1770 }, { "epoch": 0.24952448045086298, "grad_norm": 1.0228555449398256, "learning_rate": 1.757702330040439e-05, "loss": 0.9482, "step": 1771 }, { "epoch": 0.24966537513208875, "grad_norm": 1.2922692434661556, "learning_rate": 1.7574044294793014e-05, "loss": 0.4701, "step": 1772 }, { "epoch": 0.24980626981331455, "grad_norm": 1.4215819109212464, "learning_rate": 1.757106371176704e-05, "loss": 0.7047, "step": 1773 }, { "epoch": 0.24994716449454032, "grad_norm": 1.1650613929335247, "learning_rate": 1.7568081551947228e-05, "loss": 0.9584, "step": 1774 }, { "epoch": 0.2500880591757661, "grad_norm": 1.2391624148772376, "learning_rate": 1.7565097815954652e-05, "loss": 0.9699, "step": 1775 }, { "epoch": 0.2502289538569919, "grad_norm": 0.9529123653494924, "learning_rate": 1.7562112504410724e-05, "loss": 0.8943, "step": 1776 }, { "epoch": 0.2503698485382177, "grad_norm": 1.3998824911139442, "learning_rate": 1.7559125617937187e-05, "loss": 0.537, "step": 1777 }, { "epoch": 0.25051074321944344, "grad_norm": 1.010175395229358, "learning_rate": 1.7556137157156105e-05, "loss": 0.9204, "step": 1778 }, { "epoch": 0.25065163790066924, "grad_norm": 1.1920919530922807, "learning_rate": 1.7553147122689868e-05, "loss": 0.9372, "step": 1779 }, { "epoch": 0.25079253258189504, "grad_norm": 1.5062113913739819, "learning_rate": 1.7550155515161204e-05, "loss": 0.5089, "step": 1780 }, { "epoch": 0.2509334272631208, "grad_norm": 1.5552678836456326, "learning_rate": 1.7547162335193156e-05, "loss": 0.6766, "step": 1781 }, { "epoch": 0.2510743219443466, "grad_norm": 1.4457379628976397, "learning_rate": 1.7544167583409108e-05, "loss": 0.4637, "step": 1782 }, { "epoch": 0.2512152166255724, "grad_norm": 1.0224207090194386, "learning_rate": 1.754117126043276e-05, "loss": 0.9383, "step": 1783 }, { "epoch": 0.2513561113067982, "grad_norm": 0.9348844668492038, "learning_rate": 1.753817336688815e-05, "loss": 0.9293, "step": 1784 }, { "epoch": 0.25149700598802394, "grad_norm": 1.0862047180617957, "learning_rate": 1.7535173903399626e-05, "loss": 0.9294, "step": 1785 }, { "epoch": 0.25163790066924974, "grad_norm": 0.9547522328926152, "learning_rate": 1.7532172870591883e-05, "loss": 0.9069, "step": 1786 }, { "epoch": 0.25177879535047554, "grad_norm": 0.9081307712101607, "learning_rate": 1.752917026908993e-05, "loss": 0.8745, "step": 1787 }, { "epoch": 0.2519196900317013, "grad_norm": 1.0532234725041796, "learning_rate": 1.7526166099519106e-05, "loss": 0.9464, "step": 1788 }, { "epoch": 0.2520605847129271, "grad_norm": 1.13231711085388, "learning_rate": 1.752316036250508e-05, "loss": 0.9387, "step": 1789 }, { "epoch": 0.2522014793941529, "grad_norm": 1.2101610593828747, "learning_rate": 1.752015305867384e-05, "loss": 0.9736, "step": 1790 }, { "epoch": 0.25234237407537863, "grad_norm": 0.9578233266473727, "learning_rate": 1.7517144188651706e-05, "loss": 0.9206, "step": 1791 }, { "epoch": 0.25248326875660443, "grad_norm": 0.9769734174265248, "learning_rate": 1.7514133753065327e-05, "loss": 0.8603, "step": 1792 }, { "epoch": 0.25262416343783023, "grad_norm": 0.9843605382266514, "learning_rate": 1.7511121752541665e-05, "loss": 0.9524, "step": 1793 }, { "epoch": 0.25276505811905603, "grad_norm": 1.077951227189634, "learning_rate": 1.7508108187708025e-05, "loss": 0.8974, "step": 1794 }, { "epoch": 0.2529059528002818, "grad_norm": 0.990751179097368, "learning_rate": 1.750509305919202e-05, "loss": 0.8877, "step": 1795 }, { "epoch": 0.2530468474815076, "grad_norm": 1.0982378232711805, "learning_rate": 1.7502076367621614e-05, "loss": 0.9297, "step": 1796 }, { "epoch": 0.2531877421627334, "grad_norm": 1.0087477285147024, "learning_rate": 1.7499058113625067e-05, "loss": 0.8773, "step": 1797 }, { "epoch": 0.2533286368439591, "grad_norm": 1.132111606011391, "learning_rate": 1.7496038297830984e-05, "loss": 0.973, "step": 1798 }, { "epoch": 0.2534695315251849, "grad_norm": 1.2974545812240774, "learning_rate": 1.7493016920868288e-05, "loss": 0.5472, "step": 1799 }, { "epoch": 0.2536104262064107, "grad_norm": 1.4706433640718226, "learning_rate": 1.748999398336623e-05, "loss": 0.6404, "step": 1800 }, { "epoch": 0.25375132088763647, "grad_norm": 1.2477763158606676, "learning_rate": 1.7486969485954378e-05, "loss": 0.9424, "step": 1801 }, { "epoch": 0.25389221556886227, "grad_norm": 0.9647020895615208, "learning_rate": 1.7483943429262638e-05, "loss": 0.8917, "step": 1802 }, { "epoch": 0.25403311025008807, "grad_norm": 1.0276751860403388, "learning_rate": 1.7480915813921235e-05, "loss": 0.932, "step": 1803 }, { "epoch": 0.25417400493131387, "grad_norm": 1.058729654940189, "learning_rate": 1.7477886640560717e-05, "loss": 0.9257, "step": 1804 }, { "epoch": 0.2543148996125396, "grad_norm": 1.0078483337665114, "learning_rate": 1.7474855909811954e-05, "loss": 0.9146, "step": 1805 }, { "epoch": 0.2544557942937654, "grad_norm": 1.0884998325544002, "learning_rate": 1.7471823622306145e-05, "loss": 0.9359, "step": 1806 }, { "epoch": 0.2545966889749912, "grad_norm": 1.1157455594797039, "learning_rate": 1.7468789778674815e-05, "loss": 0.9447, "step": 1807 }, { "epoch": 0.25473758365621696, "grad_norm": 1.059142428653402, "learning_rate": 1.7465754379549808e-05, "loss": 0.9237, "step": 1808 }, { "epoch": 0.25487847833744276, "grad_norm": 1.0774131158464961, "learning_rate": 1.746271742556329e-05, "loss": 0.8882, "step": 1809 }, { "epoch": 0.25501937301866856, "grad_norm": 1.2196515047017151, "learning_rate": 1.745967891734776e-05, "loss": 0.8872, "step": 1810 }, { "epoch": 0.2551602676998943, "grad_norm": 1.0992486405921567, "learning_rate": 1.7456638855536032e-05, "loss": 0.9163, "step": 1811 }, { "epoch": 0.2553011623811201, "grad_norm": 1.4730944291329802, "learning_rate": 1.7453597240761248e-05, "loss": 0.5154, "step": 1812 }, { "epoch": 0.2554420570623459, "grad_norm": 1.0091395481847023, "learning_rate": 1.7450554073656873e-05, "loss": 0.8818, "step": 1813 }, { "epoch": 0.2555829517435717, "grad_norm": 1.1460632775618356, "learning_rate": 1.7447509354856698e-05, "loss": 0.8963, "step": 1814 }, { "epoch": 0.25572384642479745, "grad_norm": 1.358797280856087, "learning_rate": 1.744446308499483e-05, "loss": 0.5197, "step": 1815 }, { "epoch": 0.25586474110602325, "grad_norm": 1.0957014047851434, "learning_rate": 1.74414152647057e-05, "loss": 0.9285, "step": 1816 }, { "epoch": 0.25600563578724905, "grad_norm": 1.0475844654459396, "learning_rate": 1.743836589462407e-05, "loss": 0.8248, "step": 1817 }, { "epoch": 0.2561465304684748, "grad_norm": 1.0868044152387608, "learning_rate": 1.743531497538502e-05, "loss": 0.9057, "step": 1818 }, { "epoch": 0.2562874251497006, "grad_norm": 0.9946707761204908, "learning_rate": 1.7432262507623944e-05, "loss": 0.8657, "step": 1819 }, { "epoch": 0.2564283198309264, "grad_norm": 1.0020881010318257, "learning_rate": 1.7429208491976578e-05, "loss": 0.9457, "step": 1820 }, { "epoch": 0.25656921451215214, "grad_norm": 1.0283531953982534, "learning_rate": 1.7426152929078962e-05, "loss": 0.8613, "step": 1821 }, { "epoch": 0.25671010919337794, "grad_norm": 1.4491010993476647, "learning_rate": 1.742309581956747e-05, "loss": 0.5582, "step": 1822 }, { "epoch": 0.25685100387460374, "grad_norm": 1.1064050590115673, "learning_rate": 1.7420037164078787e-05, "loss": 0.9456, "step": 1823 }, { "epoch": 0.2569918985558295, "grad_norm": 0.947098655446784, "learning_rate": 1.741697696324993e-05, "loss": 0.9745, "step": 1824 }, { "epoch": 0.2571327932370553, "grad_norm": 1.6664487469272473, "learning_rate": 1.7413915217718236e-05, "loss": 0.5374, "step": 1825 }, { "epoch": 0.2572736879182811, "grad_norm": 1.390973395197126, "learning_rate": 1.741085192812136e-05, "loss": 0.6417, "step": 1826 }, { "epoch": 0.2574145825995069, "grad_norm": 1.2869832852211873, "learning_rate": 1.740778709509728e-05, "loss": 0.4843, "step": 1827 }, { "epoch": 0.25755547728073264, "grad_norm": 1.2297782356526703, "learning_rate": 1.74047207192843e-05, "loss": 0.4814, "step": 1828 }, { "epoch": 0.25769637196195844, "grad_norm": 1.0764174068606316, "learning_rate": 1.740165280132103e-05, "loss": 0.9612, "step": 1829 }, { "epoch": 0.25783726664318424, "grad_norm": 0.9527659653937649, "learning_rate": 1.7398583341846428e-05, "loss": 0.9239, "step": 1830 }, { "epoch": 0.25797816132441, "grad_norm": 1.0316561758529432, "learning_rate": 1.7395512341499746e-05, "loss": 0.8891, "step": 1831 }, { "epoch": 0.2581190560056358, "grad_norm": 1.0229154709883765, "learning_rate": 1.739243980092058e-05, "loss": 0.9632, "step": 1832 }, { "epoch": 0.2582599506868616, "grad_norm": 1.439101635214904, "learning_rate": 1.738936572074882e-05, "loss": 0.6903, "step": 1833 }, { "epoch": 0.25840084536808733, "grad_norm": 1.2928290526307962, "learning_rate": 1.73862901016247e-05, "loss": 0.5163, "step": 1834 }, { "epoch": 0.25854174004931313, "grad_norm": 0.9160547636459259, "learning_rate": 1.7383212944188767e-05, "loss": 0.8961, "step": 1835 }, { "epoch": 0.25868263473053893, "grad_norm": 1.0456942718356899, "learning_rate": 1.738013424908189e-05, "loss": 0.9234, "step": 1836 }, { "epoch": 0.25882352941176473, "grad_norm": 1.1211480156909746, "learning_rate": 1.7377054016945247e-05, "loss": 0.923, "step": 1837 }, { "epoch": 0.2589644240929905, "grad_norm": 1.0127649313064488, "learning_rate": 1.7373972248420354e-05, "loss": 0.9016, "step": 1838 }, { "epoch": 0.2591053187742163, "grad_norm": 1.2769790333635556, "learning_rate": 1.7370888944149033e-05, "loss": 0.5216, "step": 1839 }, { "epoch": 0.2592462134554421, "grad_norm": 1.0757993182068746, "learning_rate": 1.7367804104773433e-05, "loss": 0.9153, "step": 1840 }, { "epoch": 0.2593871081366678, "grad_norm": 1.3774456657430973, "learning_rate": 1.7364717730936026e-05, "loss": 0.5803, "step": 1841 }, { "epoch": 0.2595280028178936, "grad_norm": 0.9856672323147259, "learning_rate": 1.7361629823279585e-05, "loss": 0.9683, "step": 1842 }, { "epoch": 0.2596688974991194, "grad_norm": 1.1300278957939556, "learning_rate": 1.7358540382447223e-05, "loss": 0.4749, "step": 1843 }, { "epoch": 0.25980979218034517, "grad_norm": 1.2817436168391738, "learning_rate": 1.735544940908237e-05, "loss": 0.5236, "step": 1844 }, { "epoch": 0.25995068686157097, "grad_norm": 1.1743827454325686, "learning_rate": 1.7352356903828763e-05, "loss": 0.544, "step": 1845 }, { "epoch": 0.26009158154279677, "grad_norm": 2.826111372694886, "learning_rate": 1.734926286733047e-05, "loss": 0.6483, "step": 1846 }, { "epoch": 0.26023247622402257, "grad_norm": 1.0556842553050427, "learning_rate": 1.7346167300231865e-05, "loss": 0.9042, "step": 1847 }, { "epoch": 0.2603733709052483, "grad_norm": 1.1002955948755426, "learning_rate": 1.7343070203177655e-05, "loss": 0.9312, "step": 1848 }, { "epoch": 0.2605142655864741, "grad_norm": 1.3901702088008199, "learning_rate": 1.7339971576812863e-05, "loss": 0.7029, "step": 1849 }, { "epoch": 0.2606551602676999, "grad_norm": 0.9783626008393785, "learning_rate": 1.7336871421782818e-05, "loss": 0.9047, "step": 1850 }, { "epoch": 0.26079605494892566, "grad_norm": 1.4352341355974725, "learning_rate": 1.7333769738733187e-05, "loss": 0.5354, "step": 1851 }, { "epoch": 0.26093694963015146, "grad_norm": 1.008854309409243, "learning_rate": 1.7330666528309933e-05, "loss": 0.931, "step": 1852 }, { "epoch": 0.26107784431137726, "grad_norm": 1.2780762619108295, "learning_rate": 1.7327561791159357e-05, "loss": 0.5967, "step": 1853 }, { "epoch": 0.261218738992603, "grad_norm": 1.0480347916179344, "learning_rate": 1.7324455527928067e-05, "loss": 0.9326, "step": 1854 }, { "epoch": 0.2613596336738288, "grad_norm": 1.2155566554900594, "learning_rate": 1.7321347739262992e-05, "loss": 0.4702, "step": 1855 }, { "epoch": 0.2615005283550546, "grad_norm": 1.1301539081548846, "learning_rate": 1.7318238425811378e-05, "loss": 0.9374, "step": 1856 }, { "epoch": 0.2616414230362804, "grad_norm": 1.017011262658945, "learning_rate": 1.7315127588220787e-05, "loss": 0.9611, "step": 1857 }, { "epoch": 0.26178231771750615, "grad_norm": 1.014829139260643, "learning_rate": 1.7312015227139102e-05, "loss": 0.9328, "step": 1858 }, { "epoch": 0.26192321239873195, "grad_norm": 1.3819160618556259, "learning_rate": 1.730890134321452e-05, "loss": 0.5804, "step": 1859 }, { "epoch": 0.26206410707995775, "grad_norm": 0.9591769782351868, "learning_rate": 1.7305785937095557e-05, "loss": 0.8563, "step": 1860 }, { "epoch": 0.2622050017611835, "grad_norm": 1.079712762354452, "learning_rate": 1.730266900943105e-05, "loss": 0.9, "step": 1861 }, { "epoch": 0.2623458964424093, "grad_norm": 1.0738792940617699, "learning_rate": 1.7299550560870143e-05, "loss": 0.814, "step": 1862 }, { "epoch": 0.2624867911236351, "grad_norm": 1.0428837929901293, "learning_rate": 1.72964305920623e-05, "loss": 0.9478, "step": 1863 }, { "epoch": 0.26262768580486084, "grad_norm": 1.0688603089559088, "learning_rate": 1.7293309103657314e-05, "loss": 0.9681, "step": 1864 }, { "epoch": 0.26276858048608664, "grad_norm": 1.3924226937362743, "learning_rate": 1.7290186096305275e-05, "loss": 0.4633, "step": 1865 }, { "epoch": 0.26290947516731245, "grad_norm": 0.9703482593646364, "learning_rate": 1.7287061570656606e-05, "loss": 0.9474, "step": 1866 }, { "epoch": 0.2630503698485382, "grad_norm": 1.0985932157843519, "learning_rate": 1.7283935527362027e-05, "loss": 0.9021, "step": 1867 }, { "epoch": 0.263191264529764, "grad_norm": 0.9877320132683205, "learning_rate": 1.72808079670726e-05, "loss": 0.9208, "step": 1868 }, { "epoch": 0.2633321592109898, "grad_norm": 1.6722225434091331, "learning_rate": 1.727767889043968e-05, "loss": 0.6023, "step": 1869 }, { "epoch": 0.2634730538922156, "grad_norm": 1.0785518643760623, "learning_rate": 1.7274548298114953e-05, "loss": 0.9017, "step": 1870 }, { "epoch": 0.26361394857344134, "grad_norm": 1.0646310758625581, "learning_rate": 1.7271416190750406e-05, "loss": 0.885, "step": 1871 }, { "epoch": 0.26375484325466714, "grad_norm": 1.0456442035482982, "learning_rate": 1.726828256899836e-05, "loss": 1.0042, "step": 1872 }, { "epoch": 0.26389573793589294, "grad_norm": 1.5095947108498284, "learning_rate": 1.7265147433511432e-05, "loss": 0.6668, "step": 1873 }, { "epoch": 0.2640366326171187, "grad_norm": 1.5809123708603585, "learning_rate": 1.7262010784942572e-05, "loss": 0.5362, "step": 1874 }, { "epoch": 0.2641775272983445, "grad_norm": 1.351352434698791, "learning_rate": 1.725887262394503e-05, "loss": 0.5015, "step": 1875 }, { "epoch": 0.2643184219795703, "grad_norm": 1.1482054325655955, "learning_rate": 1.725573295117238e-05, "loss": 0.9227, "step": 1876 }, { "epoch": 0.26445931666079603, "grad_norm": 1.15566911203855, "learning_rate": 1.725259176727851e-05, "loss": 0.883, "step": 1877 }, { "epoch": 0.26460021134202183, "grad_norm": 0.9152713155433921, "learning_rate": 1.7249449072917623e-05, "loss": 0.8939, "step": 1878 }, { "epoch": 0.26474110602324763, "grad_norm": 1.7819049517487235, "learning_rate": 1.7246304868744227e-05, "loss": 0.5382, "step": 1879 }, { "epoch": 0.26488200070447343, "grad_norm": 0.9300853900674713, "learning_rate": 1.7243159155413157e-05, "loss": 0.8457, "step": 1880 }, { "epoch": 0.2650228953856992, "grad_norm": 1.4607004555259582, "learning_rate": 1.724001193357956e-05, "loss": 0.5575, "step": 1881 }, { "epoch": 0.265163790066925, "grad_norm": 1.0164874650839864, "learning_rate": 1.723686320389889e-05, "loss": 0.8974, "step": 1882 }, { "epoch": 0.2653046847481508, "grad_norm": 1.351727393610204, "learning_rate": 1.7233712967026925e-05, "loss": 0.6321, "step": 1883 }, { "epoch": 0.2654455794293765, "grad_norm": 1.0516566388480035, "learning_rate": 1.7230561223619745e-05, "loss": 0.9394, "step": 1884 }, { "epoch": 0.2655864741106023, "grad_norm": 0.9389121934592803, "learning_rate": 1.722740797433376e-05, "loss": 0.9245, "step": 1885 }, { "epoch": 0.2657273687918281, "grad_norm": 1.322072878435257, "learning_rate": 1.7224253219825672e-05, "loss": 0.5942, "step": 1886 }, { "epoch": 0.26586826347305387, "grad_norm": 0.9716328075603942, "learning_rate": 1.722109696075252e-05, "loss": 0.9559, "step": 1887 }, { "epoch": 0.26600915815427967, "grad_norm": 1.0169658718943606, "learning_rate": 1.721793919777164e-05, "loss": 0.8533, "step": 1888 }, { "epoch": 0.26615005283550547, "grad_norm": 1.1203854812893073, "learning_rate": 1.7214779931540685e-05, "loss": 0.921, "step": 1889 }, { "epoch": 0.26629094751673127, "grad_norm": 1.106129059341112, "learning_rate": 1.721161916271762e-05, "loss": 0.9293, "step": 1890 }, { "epoch": 0.266431842197957, "grad_norm": 1.0197176053366874, "learning_rate": 1.7208456891960733e-05, "loss": 0.9465, "step": 1891 }, { "epoch": 0.2665727368791828, "grad_norm": 0.9257989788441061, "learning_rate": 1.720529311992861e-05, "loss": 0.8643, "step": 1892 }, { "epoch": 0.2667136315604086, "grad_norm": 1.366223970952254, "learning_rate": 1.720212784728016e-05, "loss": 0.5969, "step": 1893 }, { "epoch": 0.26685452624163436, "grad_norm": 1.0865946519774434, "learning_rate": 1.71989610746746e-05, "loss": 0.9302, "step": 1894 }, { "epoch": 0.26699542092286016, "grad_norm": 1.036194028454332, "learning_rate": 1.7195792802771458e-05, "loss": 0.9079, "step": 1895 }, { "epoch": 0.26713631560408596, "grad_norm": 1.0570395392294263, "learning_rate": 1.719262303223058e-05, "loss": 0.9101, "step": 1896 }, { "epoch": 0.2672772102853117, "grad_norm": 1.3297261112200405, "learning_rate": 1.718945176371212e-05, "loss": 0.6325, "step": 1897 }, { "epoch": 0.2674181049665375, "grad_norm": 0.9563804899262731, "learning_rate": 1.718627899787655e-05, "loss": 0.9191, "step": 1898 }, { "epoch": 0.2675589996477633, "grad_norm": 1.1332874208395411, "learning_rate": 1.718310473538464e-05, "loss": 0.933, "step": 1899 }, { "epoch": 0.2676998943289891, "grad_norm": 1.3140435589210704, "learning_rate": 1.7179928976897485e-05, "loss": 0.5378, "step": 1900 }, { "epoch": 0.26784078901021485, "grad_norm": 1.2622853384860777, "learning_rate": 1.7176751723076483e-05, "loss": 0.4233, "step": 1901 }, { "epoch": 0.26798168369144065, "grad_norm": 1.1145555954098316, "learning_rate": 1.7173572974583354e-05, "loss": 0.9233, "step": 1902 }, { "epoch": 0.26812257837266645, "grad_norm": 1.0335073771436727, "learning_rate": 1.717039273208012e-05, "loss": 0.9299, "step": 1903 }, { "epoch": 0.2682634730538922, "grad_norm": 1.0675209747463377, "learning_rate": 1.7167210996229115e-05, "loss": 0.9132, "step": 1904 }, { "epoch": 0.268404367735118, "grad_norm": 1.0673302665638633, "learning_rate": 1.7164027767692985e-05, "loss": 0.9411, "step": 1905 }, { "epoch": 0.2685452624163438, "grad_norm": 1.3946866866911827, "learning_rate": 1.716084304713469e-05, "loss": 0.5233, "step": 1906 }, { "epoch": 0.26868615709756954, "grad_norm": 1.2168827500749368, "learning_rate": 1.7157656835217498e-05, "loss": 0.4944, "step": 1907 }, { "epoch": 0.26882705177879535, "grad_norm": 1.0620758977566025, "learning_rate": 1.7154469132604988e-05, "loss": 0.8889, "step": 1908 }, { "epoch": 0.26896794646002115, "grad_norm": 1.0025457751813243, "learning_rate": 1.7151279939961053e-05, "loss": 0.8554, "step": 1909 }, { "epoch": 0.2691088411412469, "grad_norm": 0.9661137029445026, "learning_rate": 1.7148089257949884e-05, "loss": 0.9129, "step": 1910 }, { "epoch": 0.2692497358224727, "grad_norm": 1.2478983601466498, "learning_rate": 1.7144897087236002e-05, "loss": 0.4803, "step": 1911 }, { "epoch": 0.2693906305036985, "grad_norm": 1.1943053820840774, "learning_rate": 1.714170342848422e-05, "loss": 0.4469, "step": 1912 }, { "epoch": 0.2695315251849243, "grad_norm": 1.3286977152805992, "learning_rate": 1.7138508282359667e-05, "loss": 0.487, "step": 1913 }, { "epoch": 0.26967241986615004, "grad_norm": 1.2465908966810018, "learning_rate": 1.7135311649527787e-05, "loss": 0.482, "step": 1914 }, { "epoch": 0.26981331454737584, "grad_norm": 1.239727692550265, "learning_rate": 1.7132113530654327e-05, "loss": 0.6317, "step": 1915 }, { "epoch": 0.26995420922860164, "grad_norm": 1.001863205236597, "learning_rate": 1.712891392640535e-05, "loss": 0.9034, "step": 1916 }, { "epoch": 0.2700951039098274, "grad_norm": 1.0966047660655345, "learning_rate": 1.7125712837447215e-05, "loss": 0.9494, "step": 1917 }, { "epoch": 0.2702359985910532, "grad_norm": 1.3518535281756539, "learning_rate": 1.712251026444661e-05, "loss": 0.5162, "step": 1918 }, { "epoch": 0.270376893272279, "grad_norm": 0.9510972769014768, "learning_rate": 1.7119306208070513e-05, "loss": 0.8851, "step": 1919 }, { "epoch": 0.27051778795350473, "grad_norm": 1.2626715871121224, "learning_rate": 1.7116100668986226e-05, "loss": 0.5257, "step": 1920 }, { "epoch": 0.27065868263473053, "grad_norm": 2.6718141369444575, "learning_rate": 1.711289364786135e-05, "loss": 0.5557, "step": 1921 }, { "epoch": 0.27079957731595633, "grad_norm": 1.264005746215085, "learning_rate": 1.7109685145363795e-05, "loss": 0.4842, "step": 1922 }, { "epoch": 0.27094047199718213, "grad_norm": 1.0240023117880088, "learning_rate": 1.710647516216179e-05, "loss": 0.9492, "step": 1923 }, { "epoch": 0.2710813666784079, "grad_norm": 1.1077769386246035, "learning_rate": 1.710326369892386e-05, "loss": 0.9306, "step": 1924 }, { "epoch": 0.2712222613596337, "grad_norm": 0.9413517502642468, "learning_rate": 1.710005075631884e-05, "loss": 0.896, "step": 1925 }, { "epoch": 0.2713631560408595, "grad_norm": 1.084524249993048, "learning_rate": 1.7096836335015887e-05, "loss": 0.8932, "step": 1926 }, { "epoch": 0.2715040507220852, "grad_norm": 1.4909580592157479, "learning_rate": 1.709362043568444e-05, "loss": 0.5052, "step": 1927 }, { "epoch": 0.271644945403311, "grad_norm": 1.4661598596548933, "learning_rate": 1.7090403058994272e-05, "loss": 0.5132, "step": 1928 }, { "epoch": 0.2717858400845368, "grad_norm": 1.4020681715905845, "learning_rate": 1.708718420561545e-05, "loss": 0.5797, "step": 1929 }, { "epoch": 0.27192673476576257, "grad_norm": 1.4018106082604915, "learning_rate": 1.708396387621835e-05, "loss": 0.5883, "step": 1930 }, { "epoch": 0.27206762944698837, "grad_norm": 1.2238949958969365, "learning_rate": 1.708074207147366e-05, "loss": 0.4843, "step": 1931 }, { "epoch": 0.27220852412821417, "grad_norm": 1.3711120325066462, "learning_rate": 1.7077518792052364e-05, "loss": 0.6534, "step": 1932 }, { "epoch": 0.27234941880943997, "grad_norm": 0.9855198916237632, "learning_rate": 1.7074294038625767e-05, "loss": 0.8528, "step": 1933 }, { "epoch": 0.2724903134906657, "grad_norm": 1.0541321114123008, "learning_rate": 1.7071067811865477e-05, "loss": 0.8924, "step": 1934 }, { "epoch": 0.2726312081718915, "grad_norm": 1.1984929622920815, "learning_rate": 1.7067840112443402e-05, "loss": 0.9595, "step": 1935 }, { "epoch": 0.2727721028531173, "grad_norm": 1.0580803519068265, "learning_rate": 1.7064610941031764e-05, "loss": 0.9664, "step": 1936 }, { "epoch": 0.27291299753434306, "grad_norm": 0.9129580064887708, "learning_rate": 1.7061380298303084e-05, "loss": 0.8823, "step": 1937 }, { "epoch": 0.27305389221556886, "grad_norm": 1.6129698500939227, "learning_rate": 1.7058148184930203e-05, "loss": 0.6026, "step": 1938 }, { "epoch": 0.27319478689679466, "grad_norm": 1.2790071917205685, "learning_rate": 1.7054914601586255e-05, "loss": 0.5152, "step": 1939 }, { "epoch": 0.2733356815780204, "grad_norm": 0.9646490545417452, "learning_rate": 1.7051679548944683e-05, "loss": 0.9174, "step": 1940 }, { "epoch": 0.2734765762592462, "grad_norm": 0.9771212370506716, "learning_rate": 1.7048443027679243e-05, "loss": 0.9481, "step": 1941 }, { "epoch": 0.273617470940472, "grad_norm": 1.089956294258487, "learning_rate": 1.7045205038463984e-05, "loss": 0.9177, "step": 1942 }, { "epoch": 0.2737583656216978, "grad_norm": 1.1305039545546718, "learning_rate": 1.704196558197328e-05, "loss": 0.9395, "step": 1943 }, { "epoch": 0.27389926030292355, "grad_norm": 1.034408893073874, "learning_rate": 1.703872465888179e-05, "loss": 0.8973, "step": 1944 }, { "epoch": 0.27404015498414935, "grad_norm": 0.9710539426319528, "learning_rate": 1.703548226986449e-05, "loss": 0.8961, "step": 1945 }, { "epoch": 0.27418104966537515, "grad_norm": 1.0246144417877825, "learning_rate": 1.703223841559666e-05, "loss": 0.9086, "step": 1946 }, { "epoch": 0.2743219443466009, "grad_norm": 0.9950586183734221, "learning_rate": 1.702899309675388e-05, "loss": 0.9666, "step": 1947 }, { "epoch": 0.2744628390278267, "grad_norm": 1.1235273444885014, "learning_rate": 1.7025746314012045e-05, "loss": 0.885, "step": 1948 }, { "epoch": 0.2746037337090525, "grad_norm": 1.0636976897160029, "learning_rate": 1.7022498068047343e-05, "loss": 0.9099, "step": 1949 }, { "epoch": 0.27474462839027824, "grad_norm": 1.0215712343239336, "learning_rate": 1.7019248359536276e-05, "loss": 0.9558, "step": 1950 }, { "epoch": 0.27488552307150405, "grad_norm": 1.414035392060044, "learning_rate": 1.701599718915565e-05, "loss": 0.4984, "step": 1951 }, { "epoch": 0.27502641775272985, "grad_norm": 1.0503532920191063, "learning_rate": 1.701274455758257e-05, "loss": 0.959, "step": 1952 }, { "epoch": 0.2751673124339556, "grad_norm": 1.223949477000264, "learning_rate": 1.7009490465494444e-05, "loss": 0.9752, "step": 1953 }, { "epoch": 0.2753082071151814, "grad_norm": 1.1324782503634307, "learning_rate": 1.700623491356899e-05, "loss": 0.9106, "step": 1954 }, { "epoch": 0.2754491017964072, "grad_norm": 1.2857139393878685, "learning_rate": 1.7002977902484234e-05, "loss": 0.445, "step": 1955 }, { "epoch": 0.275589996477633, "grad_norm": 0.9392345378699586, "learning_rate": 1.6999719432918497e-05, "loss": 0.9235, "step": 1956 }, { "epoch": 0.27573089115885874, "grad_norm": 1.072729653057753, "learning_rate": 1.69964595055504e-05, "loss": 0.9165, "step": 1957 }, { "epoch": 0.27587178584008454, "grad_norm": 1.5602304874850086, "learning_rate": 1.6993198121058884e-05, "loss": 0.566, "step": 1958 }, { "epoch": 0.27601268052131034, "grad_norm": 0.8992812246883994, "learning_rate": 1.6989935280123178e-05, "loss": 0.889, "step": 1959 }, { "epoch": 0.2761535752025361, "grad_norm": 1.0241957714967003, "learning_rate": 1.6986670983422825e-05, "loss": 0.8924, "step": 1960 }, { "epoch": 0.2762944698837619, "grad_norm": 1.1448185139681275, "learning_rate": 1.698340523163766e-05, "loss": 0.9097, "step": 1961 }, { "epoch": 0.2764353645649877, "grad_norm": 1.0181136479470974, "learning_rate": 1.6980138025447835e-05, "loss": 0.8957, "step": 1962 }, { "epoch": 0.27657625924621343, "grad_norm": 0.9556651081026695, "learning_rate": 1.6976869365533794e-05, "loss": 0.9133, "step": 1963 }, { "epoch": 0.27671715392743923, "grad_norm": 1.0272124448138091, "learning_rate": 1.6973599252576284e-05, "loss": 0.9483, "step": 1964 }, { "epoch": 0.27685804860866503, "grad_norm": 1.2559668426907205, "learning_rate": 1.6970327687256367e-05, "loss": 0.534, "step": 1965 }, { "epoch": 0.27699894328989083, "grad_norm": 1.0326504859635923, "learning_rate": 1.6967054670255385e-05, "loss": 0.8818, "step": 1966 }, { "epoch": 0.2771398379711166, "grad_norm": 1.0014214961955812, "learning_rate": 1.6963780202255005e-05, "loss": 0.9227, "step": 1967 }, { "epoch": 0.2772807326523424, "grad_norm": 1.0035762547425293, "learning_rate": 1.6960504283937186e-05, "loss": 0.8949, "step": 1968 }, { "epoch": 0.2774216273335682, "grad_norm": 1.339076822168353, "learning_rate": 1.6957226915984186e-05, "loss": 0.615, "step": 1969 }, { "epoch": 0.2775625220147939, "grad_norm": 1.2271659502178858, "learning_rate": 1.695394809907857e-05, "loss": 0.4962, "step": 1970 }, { "epoch": 0.2777034166960197, "grad_norm": 0.9823883048286705, "learning_rate": 1.6950667833903205e-05, "loss": 0.8647, "step": 1971 }, { "epoch": 0.2778443113772455, "grad_norm": 0.9563370317747684, "learning_rate": 1.6947386121141258e-05, "loss": 0.9256, "step": 1972 }, { "epoch": 0.27798520605847127, "grad_norm": 1.1333310939717347, "learning_rate": 1.69441029614762e-05, "loss": 0.9422, "step": 1973 }, { "epoch": 0.27812610073969707, "grad_norm": 1.2884292801993504, "learning_rate": 1.6940818355591795e-05, "loss": 0.5956, "step": 1974 }, { "epoch": 0.27826699542092287, "grad_norm": 1.142128842584375, "learning_rate": 1.6937532304172116e-05, "loss": 0.4622, "step": 1975 }, { "epoch": 0.27840789010214867, "grad_norm": 1.0617684250089223, "learning_rate": 1.6934244807901545e-05, "loss": 0.9663, "step": 1976 }, { "epoch": 0.2785487847833744, "grad_norm": 0.9448001306212147, "learning_rate": 1.693095586746474e-05, "loss": 0.952, "step": 1977 }, { "epoch": 0.2786896794646002, "grad_norm": 1.0414357459383297, "learning_rate": 1.692766548354668e-05, "loss": 0.8834, "step": 1978 }, { "epoch": 0.278830574145826, "grad_norm": 1.0182341781089077, "learning_rate": 1.6924373656832644e-05, "loss": 0.9376, "step": 1979 }, { "epoch": 0.27897146882705176, "grad_norm": 1.1419663228180765, "learning_rate": 1.6921080388008204e-05, "loss": 0.4615, "step": 1980 }, { "epoch": 0.27911236350827756, "grad_norm": 1.2702779440176937, "learning_rate": 1.6917785677759232e-05, "loss": 0.4883, "step": 1981 }, { "epoch": 0.27925325818950336, "grad_norm": 0.9085794921348185, "learning_rate": 1.691448952677191e-05, "loss": 0.8847, "step": 1982 }, { "epoch": 0.2793941528707291, "grad_norm": 0.9963653102396717, "learning_rate": 1.691119193573271e-05, "loss": 0.8267, "step": 1983 }, { "epoch": 0.2795350475519549, "grad_norm": 1.4578430566458718, "learning_rate": 1.690789290532841e-05, "loss": 0.5683, "step": 1984 }, { "epoch": 0.2796759422331807, "grad_norm": 1.3210086744934124, "learning_rate": 1.690459243624608e-05, "loss": 0.8945, "step": 1985 }, { "epoch": 0.2798168369144065, "grad_norm": 1.1834038233700455, "learning_rate": 1.69012905291731e-05, "loss": 0.8936, "step": 1986 }, { "epoch": 0.27995773159563225, "grad_norm": 1.0365796115580463, "learning_rate": 1.6897987184797142e-05, "loss": 0.9323, "step": 1987 }, { "epoch": 0.28009862627685805, "grad_norm": 0.9589994858722005, "learning_rate": 1.6894682403806183e-05, "loss": 0.9345, "step": 1988 }, { "epoch": 0.28023952095808385, "grad_norm": 0.9997673015557954, "learning_rate": 1.6891376186888492e-05, "loss": 0.9026, "step": 1989 }, { "epoch": 0.2803804156393096, "grad_norm": 1.4743058500662485, "learning_rate": 1.6888068534732643e-05, "loss": 0.6193, "step": 1990 }, { "epoch": 0.2805213103205354, "grad_norm": 0.992932603941243, "learning_rate": 1.6884759448027506e-05, "loss": 0.895, "step": 1991 }, { "epoch": 0.2806622050017612, "grad_norm": 0.9632565744712079, "learning_rate": 1.6881448927462248e-05, "loss": 0.9095, "step": 1992 }, { "epoch": 0.28080309968298695, "grad_norm": 1.3954204641850678, "learning_rate": 1.6878136973726347e-05, "loss": 0.4864, "step": 1993 }, { "epoch": 0.28094399436421275, "grad_norm": 1.1200885494769186, "learning_rate": 1.687482358750956e-05, "loss": 0.9259, "step": 1994 }, { "epoch": 0.28108488904543855, "grad_norm": 1.0550300158661672, "learning_rate": 1.6871508769501953e-05, "loss": 0.8879, "step": 1995 }, { "epoch": 0.2812257837266643, "grad_norm": 1.0306167520840739, "learning_rate": 1.6868192520393892e-05, "loss": 0.9563, "step": 1996 }, { "epoch": 0.2813666784078901, "grad_norm": 1.016487540794708, "learning_rate": 1.6864874840876045e-05, "loss": 0.8756, "step": 1997 }, { "epoch": 0.2815075730891159, "grad_norm": 0.9123210288965791, "learning_rate": 1.6861555731639353e-05, "loss": 0.9095, "step": 1998 }, { "epoch": 0.2816484677703417, "grad_norm": 0.9407119464049905, "learning_rate": 1.685823519337509e-05, "loss": 0.8931, "step": 1999 }, { "epoch": 0.28178936245156744, "grad_norm": 0.9564073750449064, "learning_rate": 1.685491322677481e-05, "loss": 0.8983, "step": 2000 }, { "epoch": 0.28193025713279324, "grad_norm": 1.318717434910594, "learning_rate": 1.685158983253035e-05, "loss": 0.5544, "step": 2001 }, { "epoch": 0.28207115181401904, "grad_norm": 0.9672349374964933, "learning_rate": 1.6848265011333874e-05, "loss": 0.9351, "step": 2002 }, { "epoch": 0.2822120464952448, "grad_norm": 1.2154710984197836, "learning_rate": 1.6844938763877828e-05, "loss": 0.4766, "step": 2003 }, { "epoch": 0.2823529411764706, "grad_norm": 1.0271136240453884, "learning_rate": 1.6841611090854944e-05, "loss": 0.9083, "step": 2004 }, { "epoch": 0.2824938358576964, "grad_norm": 0.9763400749551777, "learning_rate": 1.6838281992958275e-05, "loss": 0.8572, "step": 2005 }, { "epoch": 0.28263473053892213, "grad_norm": 1.1989470598949226, "learning_rate": 1.6834951470881153e-05, "loss": 0.4762, "step": 2006 }, { "epoch": 0.28277562522014793, "grad_norm": 1.0131845902805205, "learning_rate": 1.683161952531721e-05, "loss": 0.889, "step": 2007 }, { "epoch": 0.28291651990137373, "grad_norm": 1.4851559425018195, "learning_rate": 1.6828286156960383e-05, "loss": 0.5711, "step": 2008 }, { "epoch": 0.28305741458259953, "grad_norm": 1.1133733775502095, "learning_rate": 1.6824951366504897e-05, "loss": 0.911, "step": 2009 }, { "epoch": 0.2831983092638253, "grad_norm": 1.2066323329533448, "learning_rate": 1.6821615154645264e-05, "loss": 0.9158, "step": 2010 }, { "epoch": 0.2833392039450511, "grad_norm": 1.3706395690117337, "learning_rate": 1.681827752207632e-05, "loss": 0.6382, "step": 2011 }, { "epoch": 0.2834800986262769, "grad_norm": 1.0613882900605554, "learning_rate": 1.6814938469493166e-05, "loss": 0.9141, "step": 2012 }, { "epoch": 0.2836209933075026, "grad_norm": 1.202171444078377, "learning_rate": 1.681159799759122e-05, "loss": 0.502, "step": 2013 }, { "epoch": 0.2837618879887284, "grad_norm": 1.0385971696908054, "learning_rate": 1.6808256107066187e-05, "loss": 0.929, "step": 2014 }, { "epoch": 0.2839027826699542, "grad_norm": 0.9293776061393323, "learning_rate": 1.6804912798614066e-05, "loss": 0.9306, "step": 2015 }, { "epoch": 0.28404367735117997, "grad_norm": 1.3099854908044761, "learning_rate": 1.6801568072931154e-05, "loss": 0.6096, "step": 2016 }, { "epoch": 0.28418457203240577, "grad_norm": 1.0315367352731057, "learning_rate": 1.6798221930714045e-05, "loss": 0.9571, "step": 2017 }, { "epoch": 0.28432546671363157, "grad_norm": 1.3012991635175537, "learning_rate": 1.679487437265963e-05, "loss": 0.9033, "step": 2018 }, { "epoch": 0.28446636139485737, "grad_norm": 1.136377120664204, "learning_rate": 1.6791525399465083e-05, "loss": 0.8943, "step": 2019 }, { "epoch": 0.2846072560760831, "grad_norm": 1.0262979902180536, "learning_rate": 1.6788175011827885e-05, "loss": 0.9477, "step": 2020 }, { "epoch": 0.2847481507573089, "grad_norm": 1.042654659380883, "learning_rate": 1.6784823210445805e-05, "loss": 0.849, "step": 2021 }, { "epoch": 0.2848890454385347, "grad_norm": 1.1406854477666628, "learning_rate": 1.678146999601691e-05, "loss": 0.9442, "step": 2022 }, { "epoch": 0.28502994011976046, "grad_norm": 1.316133836123371, "learning_rate": 1.6778115369239563e-05, "loss": 0.5137, "step": 2023 }, { "epoch": 0.28517083480098626, "grad_norm": 1.1170955093662682, "learning_rate": 1.6774759330812413e-05, "loss": 0.935, "step": 2024 }, { "epoch": 0.28531172948221206, "grad_norm": 1.0344095232972905, "learning_rate": 1.677140188143441e-05, "loss": 0.8207, "step": 2025 }, { "epoch": 0.2854526241634378, "grad_norm": 1.4085288367719808, "learning_rate": 1.67680430218048e-05, "loss": 0.6023, "step": 2026 }, { "epoch": 0.2855935188446636, "grad_norm": 0.9874000258258876, "learning_rate": 1.6764682752623112e-05, "loss": 0.8881, "step": 2027 }, { "epoch": 0.2857344135258894, "grad_norm": 1.0419212674383531, "learning_rate": 1.6761321074589178e-05, "loss": 0.9311, "step": 2028 }, { "epoch": 0.2858753082071152, "grad_norm": 1.014806354708108, "learning_rate": 1.6757957988403122e-05, "loss": 0.9114, "step": 2029 }, { "epoch": 0.28601620288834095, "grad_norm": 0.9894706503927616, "learning_rate": 1.675459349476536e-05, "loss": 0.9095, "step": 2030 }, { "epoch": 0.28615709756956675, "grad_norm": 1.055929493082528, "learning_rate": 1.67512275943766e-05, "loss": 0.92, "step": 2031 }, { "epoch": 0.28629799225079255, "grad_norm": 0.899565361125418, "learning_rate": 1.674786028793784e-05, "loss": 0.9302, "step": 2032 }, { "epoch": 0.2864388869320183, "grad_norm": 1.3936196363771616, "learning_rate": 1.674449157615038e-05, "loss": 0.5186, "step": 2033 }, { "epoch": 0.2865797816132441, "grad_norm": 1.0167928592831146, "learning_rate": 1.674112145971581e-05, "loss": 0.8844, "step": 2034 }, { "epoch": 0.2867206762944699, "grad_norm": 1.0144871971109928, "learning_rate": 1.6737749939336003e-05, "loss": 0.8949, "step": 2035 }, { "epoch": 0.28686157097569565, "grad_norm": 0.8975381927161992, "learning_rate": 1.6734377015713136e-05, "loss": 0.9009, "step": 2036 }, { "epoch": 0.28700246565692145, "grad_norm": 1.0123635500119574, "learning_rate": 1.6731002689549673e-05, "loss": 0.8848, "step": 2037 }, { "epoch": 0.28714336033814725, "grad_norm": 1.3777501641553171, "learning_rate": 1.672762696154837e-05, "loss": 0.5559, "step": 2038 }, { "epoch": 0.287284255019373, "grad_norm": 1.0155213403521341, "learning_rate": 1.6724249832412277e-05, "loss": 0.8858, "step": 2039 }, { "epoch": 0.2874251497005988, "grad_norm": 0.945699875145151, "learning_rate": 1.6720871302844733e-05, "loss": 0.8896, "step": 2040 }, { "epoch": 0.2875660443818246, "grad_norm": 1.0532600784367205, "learning_rate": 1.6717491373549376e-05, "loss": 0.9033, "step": 2041 }, { "epoch": 0.2877069390630504, "grad_norm": 1.0139998992145791, "learning_rate": 1.671411004523012e-05, "loss": 0.8801, "step": 2042 }, { "epoch": 0.28784783374427614, "grad_norm": 2.058658811044355, "learning_rate": 1.6710727318591182e-05, "loss": 0.5176, "step": 2043 }, { "epoch": 0.28798872842550194, "grad_norm": 1.140903805123843, "learning_rate": 1.6707343194337076e-05, "loss": 0.8884, "step": 2044 }, { "epoch": 0.28812962310672774, "grad_norm": 1.0292641357774361, "learning_rate": 1.670395767317259e-05, "loss": 0.909, "step": 2045 }, { "epoch": 0.2882705177879535, "grad_norm": 1.0164327500302897, "learning_rate": 1.6700570755802825e-05, "loss": 0.9354, "step": 2046 }, { "epoch": 0.2884114124691793, "grad_norm": 1.0505759468203317, "learning_rate": 1.6697182442933146e-05, "loss": 0.9674, "step": 2047 }, { "epoch": 0.2885523071504051, "grad_norm": 1.2336580543112532, "learning_rate": 1.669379273526923e-05, "loss": 0.7036, "step": 2048 }, { "epoch": 0.28869320183163083, "grad_norm": 1.0249720739411876, "learning_rate": 1.669040163351704e-05, "loss": 0.9311, "step": 2049 }, { "epoch": 0.28883409651285663, "grad_norm": 1.3100395098325444, "learning_rate": 1.6687009138382818e-05, "loss": 0.4917, "step": 2050 }, { "epoch": 0.28897499119408243, "grad_norm": 1.0551741400942973, "learning_rate": 1.6683615250573117e-05, "loss": 0.9019, "step": 2051 }, { "epoch": 0.28911588587530823, "grad_norm": 1.1984432369913067, "learning_rate": 1.6680219970794753e-05, "loss": 0.5049, "step": 2052 }, { "epoch": 0.289256780556534, "grad_norm": 1.0989493197121334, "learning_rate": 1.6676823299754856e-05, "loss": 0.8949, "step": 2053 }, { "epoch": 0.2893976752377598, "grad_norm": 1.257832671176372, "learning_rate": 1.6673425238160837e-05, "loss": 0.4998, "step": 2054 }, { "epoch": 0.2895385699189856, "grad_norm": 1.0810960422536422, "learning_rate": 1.6670025786720393e-05, "loss": 0.9381, "step": 2055 }, { "epoch": 0.2896794646002113, "grad_norm": 1.7398848899617092, "learning_rate": 1.6666624946141514e-05, "loss": 0.9109, "step": 2056 }, { "epoch": 0.2898203592814371, "grad_norm": 1.266516344520233, "learning_rate": 1.666322271713248e-05, "loss": 0.5259, "step": 2057 }, { "epoch": 0.2899612539626629, "grad_norm": 1.3345354872548745, "learning_rate": 1.665981910040186e-05, "loss": 0.5629, "step": 2058 }, { "epoch": 0.29010214864388867, "grad_norm": 1.1267982640344638, "learning_rate": 1.6656414096658508e-05, "loss": 0.8841, "step": 2059 }, { "epoch": 0.29024304332511447, "grad_norm": 1.042025671010151, "learning_rate": 1.665300770661157e-05, "loss": 0.9419, "step": 2060 }, { "epoch": 0.29038393800634027, "grad_norm": 1.7227701329453877, "learning_rate": 1.6649599930970485e-05, "loss": 0.5675, "step": 2061 }, { "epoch": 0.29052483268756607, "grad_norm": 1.2275979230395455, "learning_rate": 1.6646190770444966e-05, "loss": 0.4773, "step": 2062 }, { "epoch": 0.2906657273687918, "grad_norm": 1.0046581980425353, "learning_rate": 1.6642780225745042e-05, "loss": 0.9191, "step": 2063 }, { "epoch": 0.2908066220500176, "grad_norm": 1.521338090911073, "learning_rate": 1.6639368297581e-05, "loss": 0.575, "step": 2064 }, { "epoch": 0.2909475167312434, "grad_norm": 0.927339887889469, "learning_rate": 1.6635954986663427e-05, "loss": 0.8635, "step": 2065 }, { "epoch": 0.29108841141246916, "grad_norm": 1.5544290188038425, "learning_rate": 1.6632540293703206e-05, "loss": 0.5614, "step": 2066 }, { "epoch": 0.29122930609369496, "grad_norm": 1.445540027993148, "learning_rate": 1.6629124219411497e-05, "loss": 0.5429, "step": 2067 }, { "epoch": 0.29137020077492076, "grad_norm": 1.0603732413989115, "learning_rate": 1.662570676449975e-05, "loss": 0.8722, "step": 2068 }, { "epoch": 0.2915110954561465, "grad_norm": 1.03234655513991, "learning_rate": 1.662228792967971e-05, "loss": 0.9452, "step": 2069 }, { "epoch": 0.2916519901373723, "grad_norm": 1.2162118241126778, "learning_rate": 1.66188677156634e-05, "loss": 0.5266, "step": 2070 }, { "epoch": 0.2917928848185981, "grad_norm": 1.1317793225999206, "learning_rate": 1.661544612316313e-05, "loss": 0.901, "step": 2071 }, { "epoch": 0.2919337794998239, "grad_norm": 1.2770194798032326, "learning_rate": 1.6612023152891506e-05, "loss": 0.5217, "step": 2072 }, { "epoch": 0.29207467418104965, "grad_norm": 0.9796482841089629, "learning_rate": 1.6608598805561415e-05, "loss": 0.8891, "step": 2073 }, { "epoch": 0.29221556886227545, "grad_norm": 0.888814355679372, "learning_rate": 1.6605173081886027e-05, "loss": 0.9217, "step": 2074 }, { "epoch": 0.29235646354350125, "grad_norm": 1.0311650303105155, "learning_rate": 1.6601745982578812e-05, "loss": 0.9314, "step": 2075 }, { "epoch": 0.292497358224727, "grad_norm": 1.062663221220319, "learning_rate": 1.659831750835351e-05, "loss": 0.9419, "step": 2076 }, { "epoch": 0.2926382529059528, "grad_norm": 1.2173148499543451, "learning_rate": 1.6594887659924156e-05, "loss": 0.4777, "step": 2077 }, { "epoch": 0.2927791475871786, "grad_norm": 1.1199626235072817, "learning_rate": 1.6591456438005072e-05, "loss": 0.5101, "step": 2078 }, { "epoch": 0.29292004226840435, "grad_norm": 1.1171736371508172, "learning_rate": 1.658802384331087e-05, "loss": 0.4489, "step": 2079 }, { "epoch": 0.29306093694963015, "grad_norm": 1.2851789271808083, "learning_rate": 1.6584589876556427e-05, "loss": 0.6716, "step": 2080 }, { "epoch": 0.29320183163085595, "grad_norm": 1.3506051833154136, "learning_rate": 1.6581154538456938e-05, "loss": 0.4726, "step": 2081 }, { "epoch": 0.2933427263120817, "grad_norm": 1.0081256244838495, "learning_rate": 1.6577717829727853e-05, "loss": 0.8982, "step": 2082 }, { "epoch": 0.2934836209933075, "grad_norm": 0.9728900860673505, "learning_rate": 1.6574279751084925e-05, "loss": 0.878, "step": 2083 }, { "epoch": 0.2936245156745333, "grad_norm": 1.2189428038002224, "learning_rate": 1.6570840303244192e-05, "loss": 0.9982, "step": 2084 }, { "epoch": 0.2937654103557591, "grad_norm": 1.1182363433352722, "learning_rate": 1.6567399486921973e-05, "loss": 0.8957, "step": 2085 }, { "epoch": 0.29390630503698484, "grad_norm": 1.1248365118117565, "learning_rate": 1.656395730283487e-05, "loss": 0.8629, "step": 2086 }, { "epoch": 0.29404719971821064, "grad_norm": 1.2975735507854977, "learning_rate": 1.6560513751699773e-05, "loss": 0.524, "step": 2087 }, { "epoch": 0.29418809439943644, "grad_norm": 0.9282990272895054, "learning_rate": 1.6557068834233856e-05, "loss": 0.8889, "step": 2088 }, { "epoch": 0.2943289890806622, "grad_norm": 1.06725647256025, "learning_rate": 1.6553622551154578e-05, "loss": 0.9297, "step": 2089 }, { "epoch": 0.294469883761888, "grad_norm": 1.0691821188211532, "learning_rate": 1.655017490317968e-05, "loss": 0.9516, "step": 2090 }, { "epoch": 0.2946107784431138, "grad_norm": 0.9371952964722146, "learning_rate": 1.6546725891027192e-05, "loss": 0.8808, "step": 2091 }, { "epoch": 0.29475167312433953, "grad_norm": 1.1236130811725464, "learning_rate": 1.6543275515415428e-05, "loss": 0.93, "step": 2092 }, { "epoch": 0.29489256780556533, "grad_norm": 1.3520250015090243, "learning_rate": 1.6539823777062974e-05, "loss": 0.8544, "step": 2093 }, { "epoch": 0.29503346248679113, "grad_norm": 1.2376381395498366, "learning_rate": 1.6536370676688717e-05, "loss": 0.5373, "step": 2094 }, { "epoch": 0.29517435716801693, "grad_norm": 0.9790993514323942, "learning_rate": 1.6532916215011815e-05, "loss": 0.8708, "step": 2095 }, { "epoch": 0.2953152518492427, "grad_norm": 1.0118683642701445, "learning_rate": 1.652946039275172e-05, "loss": 0.9507, "step": 2096 }, { "epoch": 0.2954561465304685, "grad_norm": 1.2482719884244085, "learning_rate": 1.6526003210628155e-05, "loss": 0.5256, "step": 2097 }, { "epoch": 0.2955970412116943, "grad_norm": 0.8910129797746037, "learning_rate": 1.652254466936114e-05, "loss": 0.8433, "step": 2098 }, { "epoch": 0.29573793589292, "grad_norm": 1.0470812360788762, "learning_rate": 1.6519084769670967e-05, "loss": 0.9312, "step": 2099 }, { "epoch": 0.2958788305741458, "grad_norm": 1.0700191716545107, "learning_rate": 1.6515623512278213e-05, "loss": 0.9232, "step": 2100 }, { "epoch": 0.2960197252553716, "grad_norm": 1.228087191545561, "learning_rate": 1.6512160897903745e-05, "loss": 0.4888, "step": 2101 }, { "epoch": 0.29616061993659737, "grad_norm": 1.5122614951189608, "learning_rate": 1.65086969272687e-05, "loss": 0.5475, "step": 2102 }, { "epoch": 0.29630151461782317, "grad_norm": 1.0602085081870347, "learning_rate": 1.6505231601094512e-05, "loss": 0.8925, "step": 2103 }, { "epoch": 0.29644240929904897, "grad_norm": 1.1955498437407401, "learning_rate": 1.6501764920102888e-05, "loss": 0.9813, "step": 2104 }, { "epoch": 0.29658330398027477, "grad_norm": 1.0996992112522896, "learning_rate": 1.6498296885015815e-05, "loss": 0.8812, "step": 2105 }, { "epoch": 0.2967241986615005, "grad_norm": 0.9409011196199354, "learning_rate": 1.6494827496555573e-05, "loss": 0.898, "step": 2106 }, { "epoch": 0.2968650933427263, "grad_norm": 1.4317588444271456, "learning_rate": 1.649135675544471e-05, "loss": 0.4877, "step": 2107 }, { "epoch": 0.2970059880239521, "grad_norm": 1.0260129992460758, "learning_rate": 1.6487884662406065e-05, "loss": 0.8945, "step": 2108 }, { "epoch": 0.29714688270517786, "grad_norm": 1.2768660744025049, "learning_rate": 1.648441121816276e-05, "loss": 0.5528, "step": 2109 }, { "epoch": 0.29728777738640366, "grad_norm": 0.955575309727979, "learning_rate": 1.648093642343819e-05, "loss": 0.9327, "step": 2110 }, { "epoch": 0.29742867206762946, "grad_norm": 1.0553485028636633, "learning_rate": 1.647746027895604e-05, "loss": 0.9166, "step": 2111 }, { "epoch": 0.2975695667488552, "grad_norm": 1.268743944929167, "learning_rate": 1.647398278544027e-05, "loss": 0.4886, "step": 2112 }, { "epoch": 0.297710461430081, "grad_norm": 1.021996568871496, "learning_rate": 1.6470503943615123e-05, "loss": 0.9541, "step": 2113 }, { "epoch": 0.2978513561113068, "grad_norm": 1.0769897971308604, "learning_rate": 1.6467023754205122e-05, "loss": 0.9538, "step": 2114 }, { "epoch": 0.2979922507925326, "grad_norm": 1.0039901901416979, "learning_rate": 1.6463542217935073e-05, "loss": 0.8643, "step": 2115 }, { "epoch": 0.29813314547375835, "grad_norm": 1.1813282529967362, "learning_rate": 1.6460059335530062e-05, "loss": 0.937, "step": 2116 }, { "epoch": 0.29827404015498415, "grad_norm": 1.3397930080664846, "learning_rate": 1.645657510771545e-05, "loss": 0.615, "step": 2117 }, { "epoch": 0.29841493483620996, "grad_norm": 0.9283345275476477, "learning_rate": 1.645308953521689e-05, "loss": 0.8671, "step": 2118 }, { "epoch": 0.2985558295174357, "grad_norm": 1.2383947343150399, "learning_rate": 1.64496026187603e-05, "loss": 0.5495, "step": 2119 }, { "epoch": 0.2986967241986615, "grad_norm": 0.9193247022238701, "learning_rate": 1.6446114359071893e-05, "loss": 0.869, "step": 2120 }, { "epoch": 0.2988376188798873, "grad_norm": 1.0902851838084666, "learning_rate": 1.6442624756878145e-05, "loss": 0.9777, "step": 2121 }, { "epoch": 0.29897851356111305, "grad_norm": 1.4406518024856831, "learning_rate": 1.6439133812905827e-05, "loss": 0.6495, "step": 2122 }, { "epoch": 0.29911940824233885, "grad_norm": 1.3249121008154976, "learning_rate": 1.6435641527881986e-05, "loss": 0.5315, "step": 2123 }, { "epoch": 0.29926030292356465, "grad_norm": 0.9894637957945245, "learning_rate": 1.643214790253394e-05, "loss": 0.8437, "step": 2124 }, { "epoch": 0.2994011976047904, "grad_norm": 1.0557773869333618, "learning_rate": 1.6428652937589293e-05, "loss": 0.8917, "step": 2125 }, { "epoch": 0.2995420922860162, "grad_norm": 1.0272642283352447, "learning_rate": 1.642515663377593e-05, "loss": 0.9785, "step": 2126 }, { "epoch": 0.299682986967242, "grad_norm": 1.3139944295656323, "learning_rate": 1.6421658991822005e-05, "loss": 0.9018, "step": 2127 }, { "epoch": 0.2998238816484678, "grad_norm": 1.0237308289968599, "learning_rate": 1.6418160012455964e-05, "loss": 0.8736, "step": 2128 }, { "epoch": 0.29996477632969354, "grad_norm": 1.1159210500486476, "learning_rate": 1.641465969640652e-05, "loss": 0.9242, "step": 2129 }, { "epoch": 0.30010567101091934, "grad_norm": 1.4283941245829477, "learning_rate": 1.6411158044402675e-05, "loss": 0.5553, "step": 2130 }, { "epoch": 0.30024656569214514, "grad_norm": 1.0449888651238604, "learning_rate": 1.64076550571737e-05, "loss": 0.8835, "step": 2131 }, { "epoch": 0.3003874603733709, "grad_norm": 1.2551265719847629, "learning_rate": 1.640415073544914e-05, "loss": 0.4981, "step": 2132 }, { "epoch": 0.3005283550545967, "grad_norm": 1.165578742253952, "learning_rate": 1.6400645079958843e-05, "loss": 0.9052, "step": 2133 }, { "epoch": 0.3006692497358225, "grad_norm": 1.1804623703609478, "learning_rate": 1.6397138091432904e-05, "loss": 0.5742, "step": 2134 }, { "epoch": 0.30081014441704823, "grad_norm": 1.129736794781232, "learning_rate": 1.639362977060171e-05, "loss": 0.8457, "step": 2135 }, { "epoch": 0.30095103909827403, "grad_norm": 0.9939676395822216, "learning_rate": 1.6390120118195925e-05, "loss": 0.8639, "step": 2136 }, { "epoch": 0.30109193377949983, "grad_norm": 1.3189544218754086, "learning_rate": 1.6386609134946492e-05, "loss": 0.4563, "step": 2137 }, { "epoch": 0.30123282846072563, "grad_norm": 1.0224188059819517, "learning_rate": 1.6383096821584626e-05, "loss": 0.8834, "step": 2138 }, { "epoch": 0.3013737231419514, "grad_norm": 0.9316562232065035, "learning_rate": 1.637958317884183e-05, "loss": 0.8872, "step": 2139 }, { "epoch": 0.3015146178231772, "grad_norm": 0.9286971359094105, "learning_rate": 1.6376068207449864e-05, "loss": 0.8587, "step": 2140 }, { "epoch": 0.301655512504403, "grad_norm": 0.9062925309138763, "learning_rate": 1.6372551908140786e-05, "loss": 0.926, "step": 2141 }, { "epoch": 0.3017964071856287, "grad_norm": 1.4230235159429345, "learning_rate": 1.636903428164691e-05, "loss": 0.5068, "step": 2142 }, { "epoch": 0.3019373018668545, "grad_norm": 1.4627256316001374, "learning_rate": 1.636551532870085e-05, "loss": 0.6079, "step": 2143 }, { "epoch": 0.3020781965480803, "grad_norm": 1.1120687180005575, "learning_rate": 1.6361995050035476e-05, "loss": 0.9241, "step": 2144 }, { "epoch": 0.30221909122930607, "grad_norm": 1.6325489344922672, "learning_rate": 1.6358473446383943e-05, "loss": 0.5369, "step": 2145 }, { "epoch": 0.30235998591053187, "grad_norm": 1.3897328498334516, "learning_rate": 1.6354950518479684e-05, "loss": 0.5312, "step": 2146 }, { "epoch": 0.30250088059175767, "grad_norm": 1.246109787380331, "learning_rate": 1.6351426267056402e-05, "loss": 0.4446, "step": 2147 }, { "epoch": 0.30264177527298347, "grad_norm": 1.1045085907572632, "learning_rate": 1.634790069284807e-05, "loss": 0.9628, "step": 2148 }, { "epoch": 0.3027826699542092, "grad_norm": 1.0206189482621264, "learning_rate": 1.6344373796588962e-05, "loss": 0.8939, "step": 2149 }, { "epoch": 0.302923564635435, "grad_norm": 1.1843580868900674, "learning_rate": 1.6340845579013597e-05, "loss": 0.4787, "step": 2150 }, { "epoch": 0.3030644593166608, "grad_norm": 1.350097429479203, "learning_rate": 1.6337316040856784e-05, "loss": 0.7098, "step": 2151 }, { "epoch": 0.30320535399788656, "grad_norm": 0.992988970154939, "learning_rate": 1.633378518285361e-05, "loss": 0.9031, "step": 2152 }, { "epoch": 0.30334624867911236, "grad_norm": 1.0427962441206906, "learning_rate": 1.6330253005739424e-05, "loss": 0.9043, "step": 2153 }, { "epoch": 0.30348714336033816, "grad_norm": 1.162354184828311, "learning_rate": 1.6326719510249865e-05, "loss": 0.8832, "step": 2154 }, { "epoch": 0.3036280380415639, "grad_norm": 1.4608025321126226, "learning_rate": 1.632318469712084e-05, "loss": 0.6232, "step": 2155 }, { "epoch": 0.3037689327227897, "grad_norm": 1.0839378627926743, "learning_rate": 1.6319648567088524e-05, "loss": 0.966, "step": 2156 }, { "epoch": 0.3039098274040155, "grad_norm": 1.3740742647006505, "learning_rate": 1.6316111120889374e-05, "loss": 0.5986, "step": 2157 }, { "epoch": 0.3040507220852413, "grad_norm": 1.078345868196605, "learning_rate": 1.6312572359260124e-05, "loss": 0.9116, "step": 2158 }, { "epoch": 0.30419161676646705, "grad_norm": 1.2430077272092226, "learning_rate": 1.630903228293777e-05, "loss": 0.9358, "step": 2159 }, { "epoch": 0.30433251144769286, "grad_norm": 1.1683075479195775, "learning_rate": 1.630549089265959e-05, "loss": 0.8964, "step": 2160 }, { "epoch": 0.30447340612891866, "grad_norm": 0.9949155574236354, "learning_rate": 1.630194818916314e-05, "loss": 0.8845, "step": 2161 }, { "epoch": 0.3046143008101444, "grad_norm": 1.0995037217837553, "learning_rate": 1.629840417318624e-05, "loss": 0.9259, "step": 2162 }, { "epoch": 0.3047551954913702, "grad_norm": 1.011922442424279, "learning_rate": 1.6294858845466987e-05, "loss": 0.9072, "step": 2163 }, { "epoch": 0.304896090172596, "grad_norm": 1.3150637985359424, "learning_rate": 1.6291312206743755e-05, "loss": 0.6846, "step": 2164 }, { "epoch": 0.30503698485382175, "grad_norm": 1.237218543357218, "learning_rate": 1.6287764257755184e-05, "loss": 0.9629, "step": 2165 }, { "epoch": 0.30517787953504755, "grad_norm": 1.0043153319387796, "learning_rate": 1.6284214999240195e-05, "loss": 0.8804, "step": 2166 }, { "epoch": 0.30531877421627335, "grad_norm": 1.0181715530576234, "learning_rate": 1.628066443193797e-05, "loss": 0.8735, "step": 2167 }, { "epoch": 0.3054596688974991, "grad_norm": 1.0593429819541202, "learning_rate": 1.6277112556587975e-05, "loss": 0.9049, "step": 2168 }, { "epoch": 0.3056005635787249, "grad_norm": 1.0303176653627995, "learning_rate": 1.6273559373929946e-05, "loss": 0.9071, "step": 2169 }, { "epoch": 0.3057414582599507, "grad_norm": 0.9358819605158261, "learning_rate": 1.6270004884703887e-05, "loss": 0.8903, "step": 2170 }, { "epoch": 0.3058823529411765, "grad_norm": 0.9938217867536909, "learning_rate": 1.6266449089650077e-05, "loss": 0.8999, "step": 2171 }, { "epoch": 0.30602324762240224, "grad_norm": 1.3248453521137973, "learning_rate": 1.6262891989509068e-05, "loss": 0.5063, "step": 2172 }, { "epoch": 0.30616414230362804, "grad_norm": 1.270334984056334, "learning_rate": 1.6259333585021678e-05, "loss": 0.5575, "step": 2173 }, { "epoch": 0.30630503698485384, "grad_norm": 1.1598975796082005, "learning_rate": 1.625577387692901e-05, "loss": 0.9103, "step": 2174 }, { "epoch": 0.3064459316660796, "grad_norm": 1.0522880425183283, "learning_rate": 1.6252212865972417e-05, "loss": 0.8261, "step": 2175 }, { "epoch": 0.3065868263473054, "grad_norm": 1.0249758062204377, "learning_rate": 1.624865055289355e-05, "loss": 0.8941, "step": 2176 }, { "epoch": 0.3067277210285312, "grad_norm": 1.0687060555419294, "learning_rate": 1.6245086938434307e-05, "loss": 0.9292, "step": 2177 }, { "epoch": 0.30686861570975693, "grad_norm": 1.1304454868422495, "learning_rate": 1.624152202333687e-05, "loss": 0.9498, "step": 2178 }, { "epoch": 0.30700951039098273, "grad_norm": 1.3223855593001281, "learning_rate": 1.6237955808343687e-05, "loss": 0.5399, "step": 2179 }, { "epoch": 0.30715040507220853, "grad_norm": 1.2219397245814059, "learning_rate": 1.6234388294197482e-05, "loss": 0.4682, "step": 2180 }, { "epoch": 0.30729129975343433, "grad_norm": 1.032290408435284, "learning_rate": 1.6230819481641246e-05, "loss": 0.8645, "step": 2181 }, { "epoch": 0.3074321944346601, "grad_norm": 1.2273279289560417, "learning_rate": 1.6227249371418242e-05, "loss": 0.5544, "step": 2182 }, { "epoch": 0.3075730891158859, "grad_norm": 0.9876355111090414, "learning_rate": 1.6223677964271996e-05, "loss": 0.8974, "step": 2183 }, { "epoch": 0.3077139837971117, "grad_norm": 1.2772647220644127, "learning_rate": 1.6220105260946315e-05, "loss": 0.5283, "step": 2184 }, { "epoch": 0.3078548784783374, "grad_norm": 1.1712688819743426, "learning_rate": 1.621653126218527e-05, "loss": 0.9674, "step": 2185 }, { "epoch": 0.3079957731595632, "grad_norm": 1.013753357829738, "learning_rate": 1.6212955968733204e-05, "loss": 0.8324, "step": 2186 }, { "epoch": 0.308136667840789, "grad_norm": 1.3199130594905013, "learning_rate": 1.6209379381334724e-05, "loss": 0.6227, "step": 2187 }, { "epoch": 0.30827756252201477, "grad_norm": 0.9553630079813076, "learning_rate": 1.6205801500734717e-05, "loss": 0.9176, "step": 2188 }, { "epoch": 0.30841845720324057, "grad_norm": 1.3467822690400422, "learning_rate": 1.6202222327678327e-05, "loss": 0.5106, "step": 2189 }, { "epoch": 0.30855935188446637, "grad_norm": 1.3850099812883199, "learning_rate": 1.619864186291098e-05, "loss": 0.4982, "step": 2190 }, { "epoch": 0.30870024656569217, "grad_norm": 0.9566904225170101, "learning_rate": 1.619506010717836e-05, "loss": 0.9158, "step": 2191 }, { "epoch": 0.3088411412469179, "grad_norm": 1.008842337796952, "learning_rate": 1.6191477061226428e-05, "loss": 0.921, "step": 2192 }, { "epoch": 0.3089820359281437, "grad_norm": 1.0639975106291455, "learning_rate": 1.61878927258014e-05, "loss": 0.8365, "step": 2193 }, { "epoch": 0.3091229306093695, "grad_norm": 1.0898511440548824, "learning_rate": 1.6184307101649784e-05, "loss": 0.877, "step": 2194 }, { "epoch": 0.30926382529059526, "grad_norm": 1.0958818298176034, "learning_rate": 1.6180720189518334e-05, "loss": 0.9203, "step": 2195 }, { "epoch": 0.30940471997182106, "grad_norm": 1.0109973939242125, "learning_rate": 1.6177131990154088e-05, "loss": 0.9506, "step": 2196 }, { "epoch": 0.30954561465304686, "grad_norm": 1.0235561259309904, "learning_rate": 1.6173542504304337e-05, "loss": 0.8797, "step": 2197 }, { "epoch": 0.3096865093342726, "grad_norm": 1.1399177690110525, "learning_rate": 1.6169951732716658e-05, "loss": 0.9333, "step": 2198 }, { "epoch": 0.3098274040154984, "grad_norm": 1.2843068509675184, "learning_rate": 1.616635967613888e-05, "loss": 0.6238, "step": 2199 }, { "epoch": 0.3099682986967242, "grad_norm": 1.5128889879847291, "learning_rate": 1.61627663353191e-05, "loss": 0.521, "step": 2200 }, { "epoch": 0.31010919337795, "grad_norm": 1.0880293603651827, "learning_rate": 1.61591717110057e-05, "loss": 0.9029, "step": 2201 }, { "epoch": 0.31025008805917575, "grad_norm": 0.9657644457279022, "learning_rate": 1.615557580394731e-05, "loss": 0.8526, "step": 2202 }, { "epoch": 0.31039098274040156, "grad_norm": 1.2359852743402326, "learning_rate": 1.6151978614892838e-05, "loss": 0.5326, "step": 2203 }, { "epoch": 0.31053187742162736, "grad_norm": 1.0429139580072957, "learning_rate": 1.6148380144591453e-05, "loss": 0.8713, "step": 2204 }, { "epoch": 0.3106727721028531, "grad_norm": 1.0211917142791331, "learning_rate": 1.6144780393792597e-05, "loss": 0.8848, "step": 2205 }, { "epoch": 0.3108136667840789, "grad_norm": 1.0747854259120222, "learning_rate": 1.614117936324597e-05, "loss": 0.8941, "step": 2206 }, { "epoch": 0.3109545614653047, "grad_norm": 1.040941258672908, "learning_rate": 1.6137577053701547e-05, "loss": 0.834, "step": 2207 }, { "epoch": 0.31109545614653045, "grad_norm": 1.0142780453097509, "learning_rate": 1.6133973465909566e-05, "loss": 0.9065, "step": 2208 }, { "epoch": 0.31123635082775625, "grad_norm": 1.1585355022345634, "learning_rate": 1.613036860062053e-05, "loss": 0.4506, "step": 2209 }, { "epoch": 0.31137724550898205, "grad_norm": 1.2964939693206272, "learning_rate": 1.6126762458585205e-05, "loss": 0.6071, "step": 2210 }, { "epoch": 0.3115181401902078, "grad_norm": 1.1263194752493384, "learning_rate": 1.6123155040554635e-05, "loss": 0.9222, "step": 2211 }, { "epoch": 0.3116590348714336, "grad_norm": 1.0680671338177445, "learning_rate": 1.611954634728012e-05, "loss": 0.9479, "step": 2212 }, { "epoch": 0.3117999295526594, "grad_norm": 1.0152532001425076, "learning_rate": 1.6115936379513225e-05, "loss": 0.947, "step": 2213 }, { "epoch": 0.3119408242338852, "grad_norm": 1.0431089886745208, "learning_rate": 1.6112325138005784e-05, "loss": 0.9955, "step": 2214 }, { "epoch": 0.31208171891511094, "grad_norm": 1.0385955455003262, "learning_rate": 1.6108712623509894e-05, "loss": 0.9424, "step": 2215 }, { "epoch": 0.31222261359633674, "grad_norm": 1.2876408726535513, "learning_rate": 1.610509883677792e-05, "loss": 0.5115, "step": 2216 }, { "epoch": 0.31236350827756254, "grad_norm": 1.070035353303837, "learning_rate": 1.6101483778562492e-05, "loss": 0.9044, "step": 2217 }, { "epoch": 0.3125044029587883, "grad_norm": 1.3335262731196915, "learning_rate": 1.6097867449616496e-05, "loss": 0.5063, "step": 2218 }, { "epoch": 0.3126452976400141, "grad_norm": 1.0058449952823059, "learning_rate": 1.60942498506931e-05, "loss": 0.8919, "step": 2219 }, { "epoch": 0.3127861923212399, "grad_norm": 1.2150523268112012, "learning_rate": 1.609063098254572e-05, "loss": 0.9313, "step": 2220 }, { "epoch": 0.31292708700246563, "grad_norm": 1.052829093224807, "learning_rate": 1.6087010845928038e-05, "loss": 0.8774, "step": 2221 }, { "epoch": 0.31306798168369143, "grad_norm": 1.0514022257970854, "learning_rate": 1.6083389441594016e-05, "loss": 0.8892, "step": 2222 }, { "epoch": 0.31320887636491723, "grad_norm": 0.9730567179208749, "learning_rate": 1.607976677029786e-05, "loss": 0.9164, "step": 2223 }, { "epoch": 0.31334977104614303, "grad_norm": 0.9452680811525239, "learning_rate": 1.6076142832794054e-05, "loss": 0.9148, "step": 2224 }, { "epoch": 0.3134906657273688, "grad_norm": 1.0361660996532944, "learning_rate": 1.6072517629837337e-05, "loss": 0.8789, "step": 2225 }, { "epoch": 0.3136315604085946, "grad_norm": 0.9631766041895699, "learning_rate": 1.6068891162182713e-05, "loss": 0.9015, "step": 2226 }, { "epoch": 0.3137724550898204, "grad_norm": 0.9901359840708506, "learning_rate": 1.6065263430585457e-05, "loss": 0.9306, "step": 2227 }, { "epoch": 0.3139133497710461, "grad_norm": 1.0909609938335298, "learning_rate": 1.60616344358011e-05, "loss": 0.8865, "step": 2228 }, { "epoch": 0.3140542444522719, "grad_norm": 0.9735806823512635, "learning_rate": 1.6058004178585434e-05, "loss": 0.8641, "step": 2229 }, { "epoch": 0.3141951391334977, "grad_norm": 0.9308913610150996, "learning_rate": 1.6054372659694518e-05, "loss": 0.8881, "step": 2230 }, { "epoch": 0.31433603381472347, "grad_norm": 1.3772693688558026, "learning_rate": 1.605073987988468e-05, "loss": 0.6621, "step": 2231 }, { "epoch": 0.31447692849594927, "grad_norm": 1.28871532714446, "learning_rate": 1.6047105839912495e-05, "loss": 0.5083, "step": 2232 }, { "epoch": 0.31461782317717507, "grad_norm": 1.1278985125422891, "learning_rate": 1.604347054053481e-05, "loss": 0.9618, "step": 2233 }, { "epoch": 0.31475871785840087, "grad_norm": 1.147933974127194, "learning_rate": 1.6039833982508744e-05, "loss": 0.9399, "step": 2234 }, { "epoch": 0.3148996125396266, "grad_norm": 1.2071786643997504, "learning_rate": 1.6036196166591658e-05, "loss": 0.89, "step": 2235 }, { "epoch": 0.3150405072208524, "grad_norm": 1.3612301041496693, "learning_rate": 1.6032557093541184e-05, "loss": 0.626, "step": 2236 }, { "epoch": 0.3151814019020782, "grad_norm": 1.416507569581499, "learning_rate": 1.6028916764115224e-05, "loss": 0.561, "step": 2237 }, { "epoch": 0.31532229658330396, "grad_norm": 1.2562590343193865, "learning_rate": 1.602527517907193e-05, "loss": 0.8846, "step": 2238 }, { "epoch": 0.31546319126452976, "grad_norm": 1.2846727648136143, "learning_rate": 1.6021632339169715e-05, "loss": 0.4995, "step": 2239 }, { "epoch": 0.31560408594575556, "grad_norm": 1.1381893850630052, "learning_rate": 1.601798824516727e-05, "loss": 0.8949, "step": 2240 }, { "epoch": 0.3157449806269813, "grad_norm": 1.0173542628565602, "learning_rate": 1.601434289782352e-05, "loss": 0.9032, "step": 2241 }, { "epoch": 0.3158858753082071, "grad_norm": 1.248685930349234, "learning_rate": 1.6010696297897684e-05, "loss": 0.5014, "step": 2242 }, { "epoch": 0.3160267699894329, "grad_norm": 0.9548644547969821, "learning_rate": 1.600704844614921e-05, "loss": 0.9224, "step": 2243 }, { "epoch": 0.3161676646706587, "grad_norm": 1.02684748096887, "learning_rate": 1.6003399343337823e-05, "loss": 0.8607, "step": 2244 }, { "epoch": 0.31630855935188446, "grad_norm": 1.0421342740411956, "learning_rate": 1.5999748990223515e-05, "loss": 0.9009, "step": 2245 }, { "epoch": 0.31644945403311026, "grad_norm": 0.9552113546308189, "learning_rate": 1.5996097387566518e-05, "loss": 0.9433, "step": 2246 }, { "epoch": 0.31659034871433606, "grad_norm": 1.0525570596040974, "learning_rate": 1.5992444536127342e-05, "loss": 0.967, "step": 2247 }, { "epoch": 0.3167312433955618, "grad_norm": 0.9903558529015161, "learning_rate": 1.5988790436666756e-05, "loss": 0.91, "step": 2248 }, { "epoch": 0.3168721380767876, "grad_norm": 0.9988492080408031, "learning_rate": 1.5985135089945774e-05, "loss": 0.9163, "step": 2249 }, { "epoch": 0.3170130327580134, "grad_norm": 1.6042089396017105, "learning_rate": 1.5981478496725683e-05, "loss": 0.9019, "step": 2250 }, { "epoch": 0.31715392743923915, "grad_norm": 0.9278094334221443, "learning_rate": 1.5977820657768032e-05, "loss": 0.9215, "step": 2251 }, { "epoch": 0.31729482212046495, "grad_norm": 1.0792076797963983, "learning_rate": 1.597416157383462e-05, "loss": 0.8752, "step": 2252 }, { "epoch": 0.31743571680169075, "grad_norm": 1.3962351539628635, "learning_rate": 1.597050124568751e-05, "loss": 0.5826, "step": 2253 }, { "epoch": 0.3175766114829165, "grad_norm": 0.9770036957486247, "learning_rate": 1.5966839674089017e-05, "loss": 0.8661, "step": 2254 }, { "epoch": 0.3177175061641423, "grad_norm": 1.0217918163235757, "learning_rate": 1.5963176859801733e-05, "loss": 0.9107, "step": 2255 }, { "epoch": 0.3178584008453681, "grad_norm": 0.9607059947879198, "learning_rate": 1.5959512803588487e-05, "loss": 0.8945, "step": 2256 }, { "epoch": 0.3179992955265939, "grad_norm": 1.092616447163984, "learning_rate": 1.5955847506212384e-05, "loss": 0.9626, "step": 2257 }, { "epoch": 0.31814019020781964, "grad_norm": 1.267816207420037, "learning_rate": 1.595218096843677e-05, "loss": 0.5002, "step": 2258 }, { "epoch": 0.31828108488904544, "grad_norm": 0.9608069176304967, "learning_rate": 1.594851319102527e-05, "loss": 0.943, "step": 2259 }, { "epoch": 0.31842197957027124, "grad_norm": 0.9935873862386496, "learning_rate": 1.5944844174741752e-05, "loss": 0.9023, "step": 2260 }, { "epoch": 0.318562874251497, "grad_norm": 1.0839976834231295, "learning_rate": 1.594117392035035e-05, "loss": 0.9096, "step": 2261 }, { "epoch": 0.3187037689327228, "grad_norm": 1.3792744341687113, "learning_rate": 1.593750242861545e-05, "loss": 0.5573, "step": 2262 }, { "epoch": 0.3188446636139486, "grad_norm": 0.9339924198369102, "learning_rate": 1.5933829700301693e-05, "loss": 0.8818, "step": 2263 }, { "epoch": 0.31898555829517433, "grad_norm": 0.9144434910527895, "learning_rate": 1.5930155736173992e-05, "loss": 0.8854, "step": 2264 }, { "epoch": 0.31912645297640013, "grad_norm": 0.9835595423257554, "learning_rate": 1.5926480536997502e-05, "loss": 0.8923, "step": 2265 }, { "epoch": 0.31926734765762593, "grad_norm": 1.0528047007168633, "learning_rate": 1.5922804103537645e-05, "loss": 0.934, "step": 2266 }, { "epoch": 0.31940824233885173, "grad_norm": 0.9746916305445668, "learning_rate": 1.5919126436560096e-05, "loss": 0.8999, "step": 2267 }, { "epoch": 0.3195491370200775, "grad_norm": 1.1152555730701277, "learning_rate": 1.5915447536830785e-05, "loss": 0.9127, "step": 2268 }, { "epoch": 0.3196900317013033, "grad_norm": 1.4662683562202345, "learning_rate": 1.5911767405115905e-05, "loss": 0.5515, "step": 2269 }, { "epoch": 0.3198309263825291, "grad_norm": 1.315667041091196, "learning_rate": 1.5908086042181898e-05, "loss": 0.4813, "step": 2270 }, { "epoch": 0.3199718210637548, "grad_norm": 1.0416579297415933, "learning_rate": 1.5904403448795465e-05, "loss": 0.9145, "step": 2271 }, { "epoch": 0.3201127157449806, "grad_norm": 1.0640665071032724, "learning_rate": 1.5900719625723567e-05, "loss": 0.9359, "step": 2272 }, { "epoch": 0.3202536104262064, "grad_norm": 0.9819920045113538, "learning_rate": 1.589703457373342e-05, "loss": 0.9416, "step": 2273 }, { "epoch": 0.32039450510743217, "grad_norm": 1.3169155062132725, "learning_rate": 1.5893348293592496e-05, "loss": 0.5066, "step": 2274 }, { "epoch": 0.32053539978865797, "grad_norm": 1.028530372155372, "learning_rate": 1.5889660786068512e-05, "loss": 0.9062, "step": 2275 }, { "epoch": 0.32067629446988377, "grad_norm": 1.5944034583886664, "learning_rate": 1.588597205192946e-05, "loss": 0.535, "step": 2276 }, { "epoch": 0.32081718915110957, "grad_norm": 1.337557684289829, "learning_rate": 1.5882282091943566e-05, "loss": 0.4534, "step": 2277 }, { "epoch": 0.3209580838323353, "grad_norm": 1.099162930613211, "learning_rate": 1.5878590906879335e-05, "loss": 0.948, "step": 2278 }, { "epoch": 0.3210989785135611, "grad_norm": 1.3823978600006972, "learning_rate": 1.5874898497505507e-05, "loss": 0.5474, "step": 2279 }, { "epoch": 0.3212398731947869, "grad_norm": 1.0398834154521033, "learning_rate": 1.587120486459109e-05, "loss": 0.9467, "step": 2280 }, { "epoch": 0.32138076787601266, "grad_norm": 1.5605229592397039, "learning_rate": 1.5867510008905333e-05, "loss": 0.6504, "step": 2281 }, { "epoch": 0.32152166255723846, "grad_norm": 1.0905868889798862, "learning_rate": 1.5863813931217754e-05, "loss": 0.9299, "step": 2282 }, { "epoch": 0.32166255723846426, "grad_norm": 1.009078265101944, "learning_rate": 1.5860116632298123e-05, "loss": 0.9003, "step": 2283 }, { "epoch": 0.32180345191969, "grad_norm": 1.0725360718656876, "learning_rate": 1.5856418112916457e-05, "loss": 0.928, "step": 2284 }, { "epoch": 0.3219443466009158, "grad_norm": 1.1692359073721614, "learning_rate": 1.585271837384303e-05, "loss": 0.9724, "step": 2285 }, { "epoch": 0.3220852412821416, "grad_norm": 1.1935556269193484, "learning_rate": 1.5849017415848372e-05, "loss": 0.5586, "step": 2286 }, { "epoch": 0.3222261359633674, "grad_norm": 0.9575475102024023, "learning_rate": 1.5845315239703267e-05, "loss": 0.9014, "step": 2287 }, { "epoch": 0.32236703064459316, "grad_norm": 1.30623632501876, "learning_rate": 1.584161184617875e-05, "loss": 0.5115, "step": 2288 }, { "epoch": 0.32250792532581896, "grad_norm": 0.9316358983578439, "learning_rate": 1.5837907236046116e-05, "loss": 0.9426, "step": 2289 }, { "epoch": 0.32264882000704476, "grad_norm": 0.9592520289321662, "learning_rate": 1.58342014100769e-05, "loss": 0.9273, "step": 2290 }, { "epoch": 0.3227897146882705, "grad_norm": 1.093227443517267, "learning_rate": 1.583049436904291e-05, "loss": 0.8782, "step": 2291 }, { "epoch": 0.3229306093694963, "grad_norm": 1.00214975978916, "learning_rate": 1.582678611371619e-05, "loss": 0.9314, "step": 2292 }, { "epoch": 0.3230715040507221, "grad_norm": 1.2265169139777417, "learning_rate": 1.582307664486904e-05, "loss": 0.5138, "step": 2293 }, { "epoch": 0.32321239873194785, "grad_norm": 1.1636678734722365, "learning_rate": 1.5819365963274017e-05, "loss": 0.9023, "step": 2294 }, { "epoch": 0.32335329341317365, "grad_norm": 1.0075570555873228, "learning_rate": 1.5815654069703933e-05, "loss": 0.9197, "step": 2295 }, { "epoch": 0.32349418809439945, "grad_norm": 1.2849848969736881, "learning_rate": 1.5811940964931852e-05, "loss": 0.5371, "step": 2296 }, { "epoch": 0.3236350827756252, "grad_norm": 1.410212399339928, "learning_rate": 1.5808226649731073e-05, "loss": 0.5129, "step": 2297 }, { "epoch": 0.323775977456851, "grad_norm": 1.1580284336417426, "learning_rate": 1.580451112487517e-05, "loss": 0.8735, "step": 2298 }, { "epoch": 0.3239168721380768, "grad_norm": 1.1260263458206725, "learning_rate": 1.5800794391137965e-05, "loss": 0.9602, "step": 2299 }, { "epoch": 0.3240577668193026, "grad_norm": 1.3830849859351875, "learning_rate": 1.579707644929352e-05, "loss": 0.6534, "step": 2300 }, { "epoch": 0.32419866150052834, "grad_norm": 1.1823476340841081, "learning_rate": 1.5793357300116153e-05, "loss": 0.4271, "step": 2301 }, { "epoch": 0.32433955618175414, "grad_norm": 0.9237030852417383, "learning_rate": 1.578963694438044e-05, "loss": 0.9253, "step": 2302 }, { "epoch": 0.32448045086297994, "grad_norm": 0.9277401412766523, "learning_rate": 1.5785915382861198e-05, "loss": 0.8956, "step": 2303 }, { "epoch": 0.3246213455442057, "grad_norm": 1.0191380827488568, "learning_rate": 1.578219261633351e-05, "loss": 0.8912, "step": 2304 }, { "epoch": 0.3247622402254315, "grad_norm": 0.9553557607496179, "learning_rate": 1.5778468645572697e-05, "loss": 0.8777, "step": 2305 }, { "epoch": 0.3249031349066573, "grad_norm": 1.0846534959707348, "learning_rate": 1.5774743471354334e-05, "loss": 0.9588, "step": 2306 }, { "epoch": 0.32504402958788303, "grad_norm": 1.0026078902076072, "learning_rate": 1.5771017094454245e-05, "loss": 0.9095, "step": 2307 }, { "epoch": 0.32518492426910883, "grad_norm": 1.3383299196691045, "learning_rate": 1.5767289515648516e-05, "loss": 0.6321, "step": 2308 }, { "epoch": 0.32532581895033463, "grad_norm": 1.2789004509670436, "learning_rate": 1.5763560735713465e-05, "loss": 0.5399, "step": 2309 }, { "epoch": 0.32546671363156043, "grad_norm": 1.420693502432982, "learning_rate": 1.5759830755425673e-05, "loss": 0.5081, "step": 2310 }, { "epoch": 0.3256076083127862, "grad_norm": 1.2057348400028276, "learning_rate": 1.5756099575561968e-05, "loss": 0.5279, "step": 2311 }, { "epoch": 0.325748502994012, "grad_norm": 0.9634110897500611, "learning_rate": 1.5752367196899424e-05, "loss": 0.9011, "step": 2312 }, { "epoch": 0.3258893976752378, "grad_norm": 1.0771522870519794, "learning_rate": 1.574863362021537e-05, "loss": 0.969, "step": 2313 }, { "epoch": 0.3260302923564635, "grad_norm": 0.9537425084812375, "learning_rate": 1.5744898846287386e-05, "loss": 0.8634, "step": 2314 }, { "epoch": 0.3261711870376893, "grad_norm": 1.3674854947386434, "learning_rate": 1.5741162875893295e-05, "loss": 0.5964, "step": 2315 }, { "epoch": 0.3263120817189151, "grad_norm": 0.9714441469597648, "learning_rate": 1.5737425709811168e-05, "loss": 0.8665, "step": 2316 }, { "epoch": 0.32645297640014087, "grad_norm": 0.9971323791427974, "learning_rate": 1.5733687348819334e-05, "loss": 0.8311, "step": 2317 }, { "epoch": 0.32659387108136667, "grad_norm": 1.0576430430803203, "learning_rate": 1.5729947793696365e-05, "loss": 0.9237, "step": 2318 }, { "epoch": 0.32673476576259247, "grad_norm": 1.2678393781629536, "learning_rate": 1.572620704522108e-05, "loss": 0.8906, "step": 2319 }, { "epoch": 0.32687566044381827, "grad_norm": 1.1717014205034877, "learning_rate": 1.5722465104172555e-05, "loss": 0.9195, "step": 2320 }, { "epoch": 0.327016555125044, "grad_norm": 0.9015989149515895, "learning_rate": 1.57187219713301e-05, "loss": 0.8863, "step": 2321 }, { "epoch": 0.3271574498062698, "grad_norm": 1.2497597994884488, "learning_rate": 1.5714977647473288e-05, "loss": 0.5603, "step": 2322 }, { "epoch": 0.3272983444874956, "grad_norm": 1.296157441944533, "learning_rate": 1.5711232133381933e-05, "loss": 0.5001, "step": 2323 }, { "epoch": 0.32743923916872136, "grad_norm": 1.0840328509009731, "learning_rate": 1.5707485429836096e-05, "loss": 0.8451, "step": 2324 }, { "epoch": 0.32758013384994716, "grad_norm": 1.0506885918976243, "learning_rate": 1.5703737537616085e-05, "loss": 0.9374, "step": 2325 }, { "epoch": 0.32772102853117296, "grad_norm": 1.0236297713078375, "learning_rate": 1.5699988457502464e-05, "loss": 0.9171, "step": 2326 }, { "epoch": 0.3278619232123987, "grad_norm": 1.3031543809934831, "learning_rate": 1.5696238190276032e-05, "loss": 0.603, "step": 2327 }, { "epoch": 0.3280028178936245, "grad_norm": 1.2272678607462948, "learning_rate": 1.5692486736717847e-05, "loss": 0.4966, "step": 2328 }, { "epoch": 0.3281437125748503, "grad_norm": 1.0904942378832116, "learning_rate": 1.5688734097609204e-05, "loss": 0.9599, "step": 2329 }, { "epoch": 0.3282846072560761, "grad_norm": 1.0851738982303571, "learning_rate": 1.568498027373165e-05, "loss": 0.8978, "step": 2330 }, { "epoch": 0.32842550193730186, "grad_norm": 1.2109178295383096, "learning_rate": 1.5681225265866983e-05, "loss": 0.4886, "step": 2331 }, { "epoch": 0.32856639661852766, "grad_norm": 1.5903314286097352, "learning_rate": 1.5677469074797236e-05, "loss": 0.5982, "step": 2332 }, { "epoch": 0.32870729129975346, "grad_norm": 1.0048796770474142, "learning_rate": 1.5673711701304705e-05, "loss": 0.9579, "step": 2333 }, { "epoch": 0.3288481859809792, "grad_norm": 1.035701914357149, "learning_rate": 1.566995314617191e-05, "loss": 0.8977, "step": 2334 }, { "epoch": 0.328989080662205, "grad_norm": 0.8833227744122708, "learning_rate": 1.5666193410181637e-05, "loss": 0.9048, "step": 2335 }, { "epoch": 0.3291299753434308, "grad_norm": 1.0617758580269094, "learning_rate": 1.566243249411691e-05, "loss": 0.9173, "step": 2336 }, { "epoch": 0.32927087002465655, "grad_norm": 1.0729314994877681, "learning_rate": 1.5658670398761e-05, "loss": 0.9081, "step": 2337 }, { "epoch": 0.32941176470588235, "grad_norm": 1.3593999576004547, "learning_rate": 1.565490712489742e-05, "loss": 0.5166, "step": 2338 }, { "epoch": 0.32955265938710815, "grad_norm": 1.2445917847237284, "learning_rate": 1.5651142673309934e-05, "loss": 0.4805, "step": 2339 }, { "epoch": 0.3296935540683339, "grad_norm": 1.0410371392934836, "learning_rate": 1.5647377044782546e-05, "loss": 0.8914, "step": 2340 }, { "epoch": 0.3298344487495597, "grad_norm": 1.0320148408904914, "learning_rate": 1.564361024009951e-05, "loss": 0.9137, "step": 2341 }, { "epoch": 0.3299753434307855, "grad_norm": 1.268145780930982, "learning_rate": 1.5639842260045322e-05, "loss": 0.5621, "step": 2342 }, { "epoch": 0.3301162381120113, "grad_norm": 1.0461355370728262, "learning_rate": 1.563607310540472e-05, "loss": 0.9022, "step": 2343 }, { "epoch": 0.33025713279323704, "grad_norm": 1.3365226502511325, "learning_rate": 1.5632302776962697e-05, "loss": 0.589, "step": 2344 }, { "epoch": 0.33039802747446284, "grad_norm": 0.9931973046390642, "learning_rate": 1.562853127550448e-05, "loss": 0.8828, "step": 2345 }, { "epoch": 0.33053892215568864, "grad_norm": 0.9006784194296306, "learning_rate": 1.5624758601815542e-05, "loss": 0.8486, "step": 2346 }, { "epoch": 0.3306798168369144, "grad_norm": 0.8974033239053687, "learning_rate": 1.5620984756681608e-05, "loss": 0.9022, "step": 2347 }, { "epoch": 0.3308207115181402, "grad_norm": 0.9883222971762784, "learning_rate": 1.561720974088863e-05, "loss": 0.8779, "step": 2348 }, { "epoch": 0.330961606199366, "grad_norm": 1.0086163873170526, "learning_rate": 1.5613433555222828e-05, "loss": 0.9243, "step": 2349 }, { "epoch": 0.33110250088059173, "grad_norm": 1.6462004413144162, "learning_rate": 1.5609656200470646e-05, "loss": 0.7118, "step": 2350 }, { "epoch": 0.33124339556181753, "grad_norm": 1.0530630513966162, "learning_rate": 1.5605877677418773e-05, "loss": 0.9179, "step": 2351 }, { "epoch": 0.33138429024304333, "grad_norm": 1.3672683027877448, "learning_rate": 1.5602097986854152e-05, "loss": 0.5103, "step": 2352 }, { "epoch": 0.33152518492426913, "grad_norm": 1.0229164388171326, "learning_rate": 1.5598317129563968e-05, "loss": 0.927, "step": 2353 }, { "epoch": 0.3316660796054949, "grad_norm": 1.4584992181406924, "learning_rate": 1.5594535106335632e-05, "loss": 0.4963, "step": 2354 }, { "epoch": 0.3318069742867207, "grad_norm": 1.1475822547101517, "learning_rate": 1.559075191795682e-05, "loss": 0.9417, "step": 2355 }, { "epoch": 0.3319478689679465, "grad_norm": 1.376697640279493, "learning_rate": 1.558696756521544e-05, "loss": 0.537, "step": 2356 }, { "epoch": 0.3320887636491722, "grad_norm": 1.4366440506278817, "learning_rate": 1.558318204889964e-05, "loss": 0.5938, "step": 2357 }, { "epoch": 0.332229658330398, "grad_norm": 0.9584256644579306, "learning_rate": 1.5579395369797815e-05, "loss": 0.8349, "step": 2358 }, { "epoch": 0.3323705530116238, "grad_norm": 1.309985859811954, "learning_rate": 1.5575607528698605e-05, "loss": 0.4995, "step": 2359 }, { "epoch": 0.33251144769284957, "grad_norm": 1.0243477137648205, "learning_rate": 1.557181852639088e-05, "loss": 0.8776, "step": 2360 }, { "epoch": 0.33265234237407537, "grad_norm": 1.0416148560435545, "learning_rate": 1.556802836366376e-05, "loss": 0.89, "step": 2361 }, { "epoch": 0.33279323705530117, "grad_norm": 1.6199323033219175, "learning_rate": 1.5564237041306616e-05, "loss": 0.5457, "step": 2362 }, { "epoch": 0.332934131736527, "grad_norm": 0.8968803444884665, "learning_rate": 1.5560444560109042e-05, "loss": 0.8847, "step": 2363 }, { "epoch": 0.3330750264177527, "grad_norm": 1.037007688932711, "learning_rate": 1.5556650920860884e-05, "loss": 0.8596, "step": 2364 }, { "epoch": 0.3332159210989785, "grad_norm": 0.95716465103336, "learning_rate": 1.555285612435223e-05, "loss": 0.9018, "step": 2365 }, { "epoch": 0.3333568157802043, "grad_norm": 0.9702680045629775, "learning_rate": 1.5549060171373404e-05, "loss": 0.9184, "step": 2366 }, { "epoch": 0.33349771046143006, "grad_norm": 0.9859786961407595, "learning_rate": 1.5545263062714974e-05, "loss": 0.8775, "step": 2367 }, { "epoch": 0.33363860514265586, "grad_norm": 1.2792449471713585, "learning_rate": 1.5541464799167747e-05, "loss": 0.4759, "step": 2368 }, { "epoch": 0.33377949982388166, "grad_norm": 1.171461354776606, "learning_rate": 1.5537665381522775e-05, "loss": 0.5172, "step": 2369 }, { "epoch": 0.3339203945051074, "grad_norm": 1.0019098379083227, "learning_rate": 1.5533864810571345e-05, "loss": 0.9065, "step": 2370 }, { "epoch": 0.3340612891863332, "grad_norm": 1.0581261233942312, "learning_rate": 1.553006308710498e-05, "loss": 0.915, "step": 2371 }, { "epoch": 0.334202183867559, "grad_norm": 1.1290178476177373, "learning_rate": 1.552626021191546e-05, "loss": 0.8898, "step": 2372 }, { "epoch": 0.33434307854878476, "grad_norm": 1.1732378227376683, "learning_rate": 1.552245618579479e-05, "loss": 0.9173, "step": 2373 }, { "epoch": 0.33448397323001056, "grad_norm": 1.0473462752771867, "learning_rate": 1.5518651009535213e-05, "loss": 0.9049, "step": 2374 }, { "epoch": 0.33462486791123636, "grad_norm": 1.053816100345464, "learning_rate": 1.551484468392922e-05, "loss": 0.8492, "step": 2375 }, { "epoch": 0.33476576259246216, "grad_norm": 0.9986794120493522, "learning_rate": 1.5511037209769545e-05, "loss": 0.918, "step": 2376 }, { "epoch": 0.3349066572736879, "grad_norm": 1.0480704941196604, "learning_rate": 1.550722858784915e-05, "loss": 0.8963, "step": 2377 }, { "epoch": 0.3350475519549137, "grad_norm": 0.9741452177924877, "learning_rate": 1.5503418818961232e-05, "loss": 0.8545, "step": 2378 }, { "epoch": 0.3351884466361395, "grad_norm": 0.9603225054874693, "learning_rate": 1.5499607903899252e-05, "loss": 0.891, "step": 2379 }, { "epoch": 0.33532934131736525, "grad_norm": 1.2557138562063948, "learning_rate": 1.5495795843456884e-05, "loss": 0.5329, "step": 2380 }, { "epoch": 0.33547023599859105, "grad_norm": 1.144782164262143, "learning_rate": 1.5491982638428052e-05, "loss": 0.9296, "step": 2381 }, { "epoch": 0.33561113067981685, "grad_norm": 0.9974284101262766, "learning_rate": 1.548816828960691e-05, "loss": 0.9145, "step": 2382 }, { "epoch": 0.3357520253610426, "grad_norm": 1.0971816104656342, "learning_rate": 1.548435279778787e-05, "loss": 0.9255, "step": 2383 }, { "epoch": 0.3358929200422684, "grad_norm": 0.9465323083581181, "learning_rate": 1.5480536163765557e-05, "loss": 0.8485, "step": 2384 }, { "epoch": 0.3360338147234942, "grad_norm": 1.0617570462347474, "learning_rate": 1.5476718388334848e-05, "loss": 0.8748, "step": 2385 }, { "epoch": 0.33617470940472, "grad_norm": 1.067996750662063, "learning_rate": 1.5472899472290856e-05, "loss": 0.9556, "step": 2386 }, { "epoch": 0.33631560408594574, "grad_norm": 1.3838060480215613, "learning_rate": 1.5469079416428935e-05, "loss": 0.6288, "step": 2387 }, { "epoch": 0.33645649876717154, "grad_norm": 1.0669020815095047, "learning_rate": 1.5465258221544663e-05, "loss": 0.9219, "step": 2388 }, { "epoch": 0.33659739344839734, "grad_norm": 1.0731990604203354, "learning_rate": 1.5461435888433874e-05, "loss": 0.5004, "step": 2389 }, { "epoch": 0.3367382881296231, "grad_norm": 1.432382031144001, "learning_rate": 1.545761241789262e-05, "loss": 0.5212, "step": 2390 }, { "epoch": 0.3368791828108489, "grad_norm": 0.9505547184158578, "learning_rate": 1.545378781071721e-05, "loss": 0.8858, "step": 2391 }, { "epoch": 0.3370200774920747, "grad_norm": 1.0193268958122603, "learning_rate": 1.5449962067704172e-05, "loss": 0.9124, "step": 2392 }, { "epoch": 0.33716097217330043, "grad_norm": 1.2528754289105095, "learning_rate": 1.5446135189650274e-05, "loss": 0.5677, "step": 2393 }, { "epoch": 0.33730186685452623, "grad_norm": 1.0633847232215563, "learning_rate": 1.5442307177352533e-05, "loss": 0.9324, "step": 2394 }, { "epoch": 0.33744276153575203, "grad_norm": 0.9685924082599794, "learning_rate": 1.5438478031608188e-05, "loss": 0.8689, "step": 2395 }, { "epoch": 0.33758365621697783, "grad_norm": 0.9605054755910302, "learning_rate": 1.543464775321472e-05, "loss": 0.9052, "step": 2396 }, { "epoch": 0.3377245508982036, "grad_norm": 0.9991304228020949, "learning_rate": 1.5430816342969842e-05, "loss": 0.909, "step": 2397 }, { "epoch": 0.3378654455794294, "grad_norm": 1.3156722935441758, "learning_rate": 1.542698380167151e-05, "loss": 0.5661, "step": 2398 }, { "epoch": 0.3380063402606552, "grad_norm": 1.3226894958350497, "learning_rate": 1.5423150130117913e-05, "loss": 0.4947, "step": 2399 }, { "epoch": 0.3381472349418809, "grad_norm": 1.0541606267425252, "learning_rate": 1.5419315329107466e-05, "loss": 0.8903, "step": 2400 }, { "epoch": 0.3382881296231067, "grad_norm": 0.9240530089610172, "learning_rate": 1.5415479399438838e-05, "loss": 0.9004, "step": 2401 }, { "epoch": 0.3384290243043325, "grad_norm": 1.1243763167552823, "learning_rate": 1.541164234191091e-05, "loss": 0.8972, "step": 2402 }, { "epoch": 0.33856991898555827, "grad_norm": 1.4043525565499586, "learning_rate": 1.540780415732282e-05, "loss": 0.5025, "step": 2403 }, { "epoch": 0.33871081366678407, "grad_norm": 1.0971593965010964, "learning_rate": 1.5403964846473924e-05, "loss": 0.9088, "step": 2404 }, { "epoch": 0.3388517083480099, "grad_norm": 1.3460310589206694, "learning_rate": 1.5400124410163823e-05, "loss": 0.5406, "step": 2405 }, { "epoch": 0.3389926030292357, "grad_norm": 1.2637820422154495, "learning_rate": 1.5396282849192347e-05, "loss": 0.5898, "step": 2406 }, { "epoch": 0.3391334977104614, "grad_norm": 1.1716557806594363, "learning_rate": 1.5392440164359562e-05, "loss": 0.4502, "step": 2407 }, { "epoch": 0.3392743923916872, "grad_norm": 1.335213822629834, "learning_rate": 1.5388596356465772e-05, "loss": 0.6797, "step": 2408 }, { "epoch": 0.339415287072913, "grad_norm": 0.8870953454454525, "learning_rate": 1.5384751426311504e-05, "loss": 0.8908, "step": 2409 }, { "epoch": 0.33955618175413876, "grad_norm": 0.9535960006002598, "learning_rate": 1.538090537469753e-05, "loss": 0.8355, "step": 2410 }, { "epoch": 0.33969707643536456, "grad_norm": 1.1069138545516795, "learning_rate": 1.537705820242485e-05, "loss": 0.979, "step": 2411 }, { "epoch": 0.33983797111659036, "grad_norm": 0.9139429291322967, "learning_rate": 1.53732099102947e-05, "loss": 0.8669, "step": 2412 }, { "epoch": 0.3399788657978161, "grad_norm": 0.8755668011196128, "learning_rate": 1.536936049910854e-05, "loss": 0.9122, "step": 2413 }, { "epoch": 0.3401197604790419, "grad_norm": 1.0386875552598704, "learning_rate": 1.536550996966808e-05, "loss": 0.8862, "step": 2414 }, { "epoch": 0.3402606551602677, "grad_norm": 1.0584660527525322, "learning_rate": 1.536165832277525e-05, "loss": 0.8778, "step": 2415 }, { "epoch": 0.34040154984149346, "grad_norm": 1.080095696834478, "learning_rate": 1.535780555923222e-05, "loss": 0.919, "step": 2416 }, { "epoch": 0.34054244452271926, "grad_norm": 1.4503214830873576, "learning_rate": 1.5353951679841385e-05, "loss": 0.569, "step": 2417 }, { "epoch": 0.34068333920394506, "grad_norm": 0.9645091510177884, "learning_rate": 1.535009668540538e-05, "loss": 0.9094, "step": 2418 }, { "epoch": 0.34082423388517086, "grad_norm": 1.0421076180555644, "learning_rate": 1.534624057672706e-05, "loss": 0.9057, "step": 2419 }, { "epoch": 0.3409651285663966, "grad_norm": 1.264408384362418, "learning_rate": 1.534238335460953e-05, "loss": 0.4623, "step": 2420 }, { "epoch": 0.3411060232476224, "grad_norm": 1.0686793272620085, "learning_rate": 1.5338525019856113e-05, "loss": 0.9609, "step": 2421 }, { "epoch": 0.3412469179288482, "grad_norm": 1.2140271214999592, "learning_rate": 1.533466557327037e-05, "loss": 0.4934, "step": 2422 }, { "epoch": 0.34138781261007395, "grad_norm": 0.9947937270517824, "learning_rate": 1.5330805015656092e-05, "loss": 0.9356, "step": 2423 }, { "epoch": 0.34152870729129975, "grad_norm": 1.033250643365059, "learning_rate": 1.5326943347817297e-05, "loss": 0.8884, "step": 2424 }, { "epoch": 0.34166960197252555, "grad_norm": 1.199750487731313, "learning_rate": 1.5323080570558247e-05, "loss": 0.902, "step": 2425 }, { "epoch": 0.3418104966537513, "grad_norm": 1.1280250483167211, "learning_rate": 1.5319216684683417e-05, "loss": 0.9143, "step": 2426 }, { "epoch": 0.3419513913349771, "grad_norm": 0.8763234910902348, "learning_rate": 1.531535169099753e-05, "loss": 0.8632, "step": 2427 }, { "epoch": 0.3420922860162029, "grad_norm": 1.3679204604542283, "learning_rate": 1.531148559030553e-05, "loss": 0.483, "step": 2428 }, { "epoch": 0.3422331806974287, "grad_norm": 1.258835609772205, "learning_rate": 1.530761838341259e-05, "loss": 0.5552, "step": 2429 }, { "epoch": 0.34237407537865444, "grad_norm": 1.4650442669310328, "learning_rate": 1.530375007112412e-05, "loss": 0.6278, "step": 2430 }, { "epoch": 0.34251497005988024, "grad_norm": 0.9962110133991177, "learning_rate": 1.529988065424576e-05, "loss": 0.944, "step": 2431 }, { "epoch": 0.34265586474110604, "grad_norm": 1.0285239282350518, "learning_rate": 1.529601013358337e-05, "loss": 0.8717, "step": 2432 }, { "epoch": 0.3427967594223318, "grad_norm": 0.962197180165096, "learning_rate": 1.5292138509943053e-05, "loss": 0.8977, "step": 2433 }, { "epoch": 0.3429376541035576, "grad_norm": 1.4568957190029204, "learning_rate": 1.5288265784131134e-05, "loss": 0.5281, "step": 2434 }, { "epoch": 0.3430785487847834, "grad_norm": 1.4846473520952888, "learning_rate": 1.528439195695417e-05, "loss": 0.5811, "step": 2435 }, { "epoch": 0.34321944346600913, "grad_norm": 0.9227192470374196, "learning_rate": 1.5280517029218947e-05, "loss": 0.8927, "step": 2436 }, { "epoch": 0.34336033814723493, "grad_norm": 1.2444563974428577, "learning_rate": 1.5276641001732478e-05, "loss": 0.5166, "step": 2437 }, { "epoch": 0.34350123282846073, "grad_norm": 1.0517032514980988, "learning_rate": 1.527276387530201e-05, "loss": 0.8691, "step": 2438 }, { "epoch": 0.34364212750968653, "grad_norm": 0.925908195552452, "learning_rate": 1.5268885650735014e-05, "loss": 0.8913, "step": 2439 }, { "epoch": 0.3437830221909123, "grad_norm": 1.138589743493273, "learning_rate": 1.526500632883919e-05, "loss": 0.5341, "step": 2440 }, { "epoch": 0.3439239168721381, "grad_norm": 1.3565487235447555, "learning_rate": 1.5261125910422478e-05, "loss": 0.6457, "step": 2441 }, { "epoch": 0.3440648115533639, "grad_norm": 1.0595871491059843, "learning_rate": 1.5257244396293023e-05, "loss": 0.9373, "step": 2442 }, { "epoch": 0.3442057062345896, "grad_norm": 0.9521801622192504, "learning_rate": 1.525336178725922e-05, "loss": 0.9283, "step": 2443 }, { "epoch": 0.3443466009158154, "grad_norm": 0.9280697180376533, "learning_rate": 1.5249478084129679e-05, "loss": 0.8594, "step": 2444 }, { "epoch": 0.3444874955970412, "grad_norm": 0.953085327800525, "learning_rate": 1.5245593287713248e-05, "loss": 0.9113, "step": 2445 }, { "epoch": 0.34462839027826697, "grad_norm": 1.0353057829992434, "learning_rate": 1.5241707398818993e-05, "loss": 0.8989, "step": 2446 }, { "epoch": 0.34476928495949277, "grad_norm": 1.0179893476027027, "learning_rate": 1.5237820418256212e-05, "loss": 0.9086, "step": 2447 }, { "epoch": 0.3449101796407186, "grad_norm": 1.0934278474041026, "learning_rate": 1.5233932346834434e-05, "loss": 0.9686, "step": 2448 }, { "epoch": 0.3450510743219444, "grad_norm": 1.2940054278775024, "learning_rate": 1.5230043185363409e-05, "loss": 0.4978, "step": 2449 }, { "epoch": 0.3451919690031701, "grad_norm": 1.3673795113219764, "learning_rate": 1.5226152934653114e-05, "loss": 0.6156, "step": 2450 }, { "epoch": 0.3453328636843959, "grad_norm": 0.9937630817893552, "learning_rate": 1.522226159551376e-05, "loss": 0.866, "step": 2451 }, { "epoch": 0.3454737583656217, "grad_norm": 0.9606647581542705, "learning_rate": 1.5218369168755779e-05, "loss": 0.91, "step": 2452 }, { "epoch": 0.34561465304684746, "grad_norm": 1.0698913372426515, "learning_rate": 1.5214475655189827e-05, "loss": 0.8664, "step": 2453 }, { "epoch": 0.34575554772807326, "grad_norm": 1.275532220075556, "learning_rate": 1.5210581055626793e-05, "loss": 0.4538, "step": 2454 }, { "epoch": 0.34589644240929907, "grad_norm": 1.191495886708577, "learning_rate": 1.5206685370877786e-05, "loss": 0.5392, "step": 2455 }, { "epoch": 0.3460373370905248, "grad_norm": 1.041507448219603, "learning_rate": 1.5202788601754146e-05, "loss": 0.8905, "step": 2456 }, { "epoch": 0.3461782317717506, "grad_norm": 1.0101626098871443, "learning_rate": 1.5198890749067436e-05, "loss": 0.8809, "step": 2457 }, { "epoch": 0.3463191264529764, "grad_norm": 1.0876151555199325, "learning_rate": 1.5194991813629449e-05, "loss": 0.9232, "step": 2458 }, { "epoch": 0.34646002113420216, "grad_norm": 0.9478027912470328, "learning_rate": 1.51910917962522e-05, "loss": 0.9091, "step": 2459 }, { "epoch": 0.34660091581542796, "grad_norm": 1.0384215195909057, "learning_rate": 1.5187190697747922e-05, "loss": 0.8659, "step": 2460 }, { "epoch": 0.34674181049665376, "grad_norm": 1.0210449678406603, "learning_rate": 1.5183288518929085e-05, "loss": 0.8824, "step": 2461 }, { "epoch": 0.34688270517787956, "grad_norm": 1.3866345085300926, "learning_rate": 1.5179385260608382e-05, "loss": 0.526, "step": 2462 }, { "epoch": 0.3470235998591053, "grad_norm": 1.001687113423918, "learning_rate": 1.5175480923598725e-05, "loss": 0.8799, "step": 2463 }, { "epoch": 0.3471644945403311, "grad_norm": 0.871437661966555, "learning_rate": 1.5171575508713257e-05, "loss": 0.9068, "step": 2464 }, { "epoch": 0.3473053892215569, "grad_norm": 1.000033123099191, "learning_rate": 1.5167669016765336e-05, "loss": 0.8526, "step": 2465 }, { "epoch": 0.34744628390278265, "grad_norm": 1.4219900704776185, "learning_rate": 1.5163761448568558e-05, "loss": 0.5019, "step": 2466 }, { "epoch": 0.34758717858400845, "grad_norm": 1.0285152971682134, "learning_rate": 1.5159852804936732e-05, "loss": 0.9374, "step": 2467 }, { "epoch": 0.34772807326523425, "grad_norm": 1.0005853571430097, "learning_rate": 1.5155943086683898e-05, "loss": 0.8988, "step": 2468 }, { "epoch": 0.34786896794646, "grad_norm": 0.9840391836925042, "learning_rate": 1.5152032294624313e-05, "loss": 0.9204, "step": 2469 }, { "epoch": 0.3480098626276858, "grad_norm": 1.2469236480366974, "learning_rate": 1.5148120429572464e-05, "loss": 0.9221, "step": 2470 }, { "epoch": 0.3481507573089116, "grad_norm": 1.1361245637720876, "learning_rate": 1.5144207492343058e-05, "loss": 0.9411, "step": 2471 }, { "epoch": 0.3482916519901374, "grad_norm": 1.2638177037279832, "learning_rate": 1.5140293483751025e-05, "loss": 0.59, "step": 2472 }, { "epoch": 0.34843254667136314, "grad_norm": 1.1477752580261207, "learning_rate": 1.5136378404611521e-05, "loss": 0.9243, "step": 2473 }, { "epoch": 0.34857344135258894, "grad_norm": 1.3844216748782134, "learning_rate": 1.5132462255739922e-05, "loss": 0.5701, "step": 2474 }, { "epoch": 0.34871433603381474, "grad_norm": 1.2290278411716906, "learning_rate": 1.5128545037951827e-05, "loss": 0.5899, "step": 2475 }, { "epoch": 0.3488552307150405, "grad_norm": 1.0295591740529326, "learning_rate": 1.5124626752063061e-05, "loss": 0.9023, "step": 2476 }, { "epoch": 0.3489961253962663, "grad_norm": 1.0729586071334414, "learning_rate": 1.512070739888967e-05, "loss": 0.9471, "step": 2477 }, { "epoch": 0.3491370200774921, "grad_norm": 0.9835066556979064, "learning_rate": 1.5116786979247917e-05, "loss": 0.92, "step": 2478 }, { "epoch": 0.34927791475871783, "grad_norm": 1.0700157113479594, "learning_rate": 1.5112865493954293e-05, "loss": 0.9616, "step": 2479 }, { "epoch": 0.34941880943994363, "grad_norm": 1.087277201077758, "learning_rate": 1.5108942943825513e-05, "loss": 0.9801, "step": 2480 }, { "epoch": 0.34955970412116943, "grad_norm": 1.0157851397100364, "learning_rate": 1.5105019329678507e-05, "loss": 0.943, "step": 2481 }, { "epoch": 0.34970059880239523, "grad_norm": 0.9020778598631868, "learning_rate": 1.5101094652330427e-05, "loss": 0.9364, "step": 2482 }, { "epoch": 0.349841493483621, "grad_norm": 0.9872159915476755, "learning_rate": 1.5097168912598657e-05, "loss": 0.8839, "step": 2483 }, { "epoch": 0.3499823881648468, "grad_norm": 1.620839466632486, "learning_rate": 1.5093242111300788e-05, "loss": 0.6263, "step": 2484 }, { "epoch": 0.3501232828460726, "grad_norm": 0.9816876048631428, "learning_rate": 1.5089314249254638e-05, "loss": 0.8629, "step": 2485 }, { "epoch": 0.3502641775272983, "grad_norm": 0.9042739070185415, "learning_rate": 1.508538532727825e-05, "loss": 0.8644, "step": 2486 }, { "epoch": 0.3504050722085241, "grad_norm": 0.9729405197715171, "learning_rate": 1.5081455346189884e-05, "loss": 0.8703, "step": 2487 }, { "epoch": 0.3505459668897499, "grad_norm": 0.9924514523983359, "learning_rate": 1.5077524306808019e-05, "loss": 0.9309, "step": 2488 }, { "epoch": 0.35068686157097567, "grad_norm": 1.0812643684127508, "learning_rate": 1.5073592209951354e-05, "loss": 0.9352, "step": 2489 }, { "epoch": 0.3508277562522015, "grad_norm": 1.3736955401019084, "learning_rate": 1.5069659056438817e-05, "loss": 0.5681, "step": 2490 }, { "epoch": 0.3509686509334273, "grad_norm": 1.0054671927849779, "learning_rate": 1.5065724847089542e-05, "loss": 0.9425, "step": 2491 }, { "epoch": 0.3511095456146531, "grad_norm": 1.081520099680155, "learning_rate": 1.50617895827229e-05, "loss": 0.8694, "step": 2492 }, { "epoch": 0.3512504402958788, "grad_norm": 1.3045406921908242, "learning_rate": 1.505785326415846e-05, "loss": 0.6008, "step": 2493 }, { "epoch": 0.3513913349771046, "grad_norm": 1.2878640767032676, "learning_rate": 1.505391589221603e-05, "loss": 0.5209, "step": 2494 }, { "epoch": 0.3515322296583304, "grad_norm": 1.025873999823699, "learning_rate": 1.504997746771563e-05, "loss": 0.8849, "step": 2495 }, { "epoch": 0.35167312433955616, "grad_norm": 0.9789133142490666, "learning_rate": 1.5046037991477499e-05, "loss": 0.9102, "step": 2496 }, { "epoch": 0.35181401902078197, "grad_norm": 1.040640475314033, "learning_rate": 1.5042097464322092e-05, "loss": 0.8777, "step": 2497 }, { "epoch": 0.35195491370200777, "grad_norm": 1.0265258087648819, "learning_rate": 1.5038155887070089e-05, "loss": 0.932, "step": 2498 }, { "epoch": 0.3520958083832335, "grad_norm": 1.253692805792949, "learning_rate": 1.5034213260542383e-05, "loss": 0.8615, "step": 2499 }, { "epoch": 0.3522367030644593, "grad_norm": 1.27508229513961, "learning_rate": 1.5030269585560096e-05, "loss": 0.5169, "step": 2500 }, { "epoch": 0.3523775977456851, "grad_norm": 1.3026272449805534, "learning_rate": 1.5026324862944552e-05, "loss": 0.5987, "step": 2501 }, { "epoch": 0.35251849242691086, "grad_norm": 1.0378703736016395, "learning_rate": 1.5022379093517306e-05, "loss": 0.9082, "step": 2502 }, { "epoch": 0.35265938710813666, "grad_norm": 1.0643741031232516, "learning_rate": 1.5018432278100128e-05, "loss": 0.9282, "step": 2503 }, { "epoch": 0.35280028178936246, "grad_norm": 1.0034312739540732, "learning_rate": 1.5014484417514999e-05, "loss": 0.9129, "step": 2504 }, { "epoch": 0.35294117647058826, "grad_norm": 1.0401177573100042, "learning_rate": 1.501053551258413e-05, "loss": 0.8422, "step": 2505 }, { "epoch": 0.353082071151814, "grad_norm": 1.1186360815909329, "learning_rate": 1.5006585564129938e-05, "loss": 0.9986, "step": 2506 }, { "epoch": 0.3532229658330398, "grad_norm": 1.1094201118710771, "learning_rate": 1.5002634572975065e-05, "loss": 0.8735, "step": 2507 }, { "epoch": 0.3533638605142656, "grad_norm": 0.9498044841872176, "learning_rate": 1.499868253994237e-05, "loss": 0.8815, "step": 2508 }, { "epoch": 0.35350475519549135, "grad_norm": 1.0694870755693198, "learning_rate": 1.499472946585492e-05, "loss": 0.9155, "step": 2509 }, { "epoch": 0.35364564987671715, "grad_norm": 1.3506976443439054, "learning_rate": 1.4990775351536014e-05, "loss": 0.7153, "step": 2510 }, { "epoch": 0.35378654455794295, "grad_norm": 0.9614218368580868, "learning_rate": 1.4986820197809149e-05, "loss": 0.9104, "step": 2511 }, { "epoch": 0.3539274392391687, "grad_norm": 1.198548214190066, "learning_rate": 1.4982864005498052e-05, "loss": 0.8614, "step": 2512 }, { "epoch": 0.3540683339203945, "grad_norm": 1.0168904818001012, "learning_rate": 1.4978906775426667e-05, "loss": 0.8407, "step": 2513 }, { "epoch": 0.3542092286016203, "grad_norm": 1.3697325279286754, "learning_rate": 1.4974948508419146e-05, "loss": 0.5332, "step": 2514 }, { "epoch": 0.3543501232828461, "grad_norm": 0.9300840858074935, "learning_rate": 1.4970989205299857e-05, "loss": 0.8858, "step": 2515 }, { "epoch": 0.35449101796407184, "grad_norm": 1.537786930343967, "learning_rate": 1.4967028866893399e-05, "loss": 0.5869, "step": 2516 }, { "epoch": 0.35463191264529764, "grad_norm": 1.2307871603000122, "learning_rate": 1.496306749402456e-05, "loss": 0.9344, "step": 2517 }, { "epoch": 0.35477280732652344, "grad_norm": 1.2737443060947677, "learning_rate": 1.4959105087518372e-05, "loss": 0.478, "step": 2518 }, { "epoch": 0.3549137020077492, "grad_norm": 1.1215208421458618, "learning_rate": 1.4955141648200063e-05, "loss": 0.9297, "step": 2519 }, { "epoch": 0.355054596688975, "grad_norm": 1.1427782049523105, "learning_rate": 1.495117717689508e-05, "loss": 0.8627, "step": 2520 }, { "epoch": 0.3551954913702008, "grad_norm": 1.2565448497861729, "learning_rate": 1.4947211674429091e-05, "loss": 0.4621, "step": 2521 }, { "epoch": 0.35533638605142653, "grad_norm": 0.9956384503323457, "learning_rate": 1.494324514162797e-05, "loss": 0.8606, "step": 2522 }, { "epoch": 0.35547728073265233, "grad_norm": 1.1374499789537384, "learning_rate": 1.4939277579317816e-05, "loss": 0.9619, "step": 2523 }, { "epoch": 0.35561817541387813, "grad_norm": 1.397713840996951, "learning_rate": 1.4935308988324935e-05, "loss": 0.5092, "step": 2524 }, { "epoch": 0.35575907009510394, "grad_norm": 0.9091695793254853, "learning_rate": 1.4931339369475843e-05, "loss": 0.9476, "step": 2525 }, { "epoch": 0.3558999647763297, "grad_norm": 1.0858332559681356, "learning_rate": 1.4927368723597287e-05, "loss": 0.958, "step": 2526 }, { "epoch": 0.3560408594575555, "grad_norm": 1.0765953645630242, "learning_rate": 1.4923397051516204e-05, "loss": 0.9392, "step": 2527 }, { "epoch": 0.3561817541387813, "grad_norm": 1.0036234562848767, "learning_rate": 1.4919424354059769e-05, "loss": 0.8888, "step": 2528 }, { "epoch": 0.356322648820007, "grad_norm": 1.3366242568581403, "learning_rate": 1.4915450632055351e-05, "loss": 0.5927, "step": 2529 }, { "epoch": 0.3564635435012328, "grad_norm": 1.3237218209242134, "learning_rate": 1.4911475886330542e-05, "loss": 0.5157, "step": 2530 }, { "epoch": 0.3566044381824586, "grad_norm": 1.1527026892171996, "learning_rate": 1.4907500117713152e-05, "loss": 0.8348, "step": 2531 }, { "epoch": 0.3567453328636844, "grad_norm": 0.9451787662123764, "learning_rate": 1.4903523327031191e-05, "loss": 0.941, "step": 2532 }, { "epoch": 0.3568862275449102, "grad_norm": 0.9918703690584009, "learning_rate": 1.489954551511289e-05, "loss": 0.8175, "step": 2533 }, { "epoch": 0.357027122226136, "grad_norm": 1.0636324387010094, "learning_rate": 1.4895566682786693e-05, "loss": 0.9654, "step": 2534 }, { "epoch": 0.3571680169073618, "grad_norm": 1.0224224342099244, "learning_rate": 1.489158683088125e-05, "loss": 0.9422, "step": 2535 }, { "epoch": 0.3573089115885875, "grad_norm": 0.8836432432682866, "learning_rate": 1.4887605960225438e-05, "loss": 0.9389, "step": 2536 }, { "epoch": 0.3574498062698133, "grad_norm": 1.5084773020332738, "learning_rate": 1.4883624071648327e-05, "loss": 0.5088, "step": 2537 }, { "epoch": 0.3575907009510391, "grad_norm": 0.9378443600714738, "learning_rate": 1.487964116597921e-05, "loss": 0.8536, "step": 2538 }, { "epoch": 0.35773159563226486, "grad_norm": 0.9650894885170309, "learning_rate": 1.4875657244047595e-05, "loss": 0.9474, "step": 2539 }, { "epoch": 0.35787249031349067, "grad_norm": 0.9486041819144156, "learning_rate": 1.4871672306683192e-05, "loss": 0.8381, "step": 2540 }, { "epoch": 0.35801338499471647, "grad_norm": 1.3528665775467066, "learning_rate": 1.486768635471593e-05, "loss": 0.6327, "step": 2541 }, { "epoch": 0.3581542796759422, "grad_norm": 0.974463084127153, "learning_rate": 1.486369938897594e-05, "loss": 0.8876, "step": 2542 }, { "epoch": 0.358295174357168, "grad_norm": 0.9465624137110271, "learning_rate": 1.4859711410293582e-05, "loss": 0.9516, "step": 2543 }, { "epoch": 0.3584360690383938, "grad_norm": 1.3186710929399605, "learning_rate": 1.4855722419499407e-05, "loss": 0.4527, "step": 2544 }, { "epoch": 0.35857696371961956, "grad_norm": 0.9885080639507836, "learning_rate": 1.4851732417424189e-05, "loss": 0.9027, "step": 2545 }, { "epoch": 0.35871785840084536, "grad_norm": 1.2579527682432916, "learning_rate": 1.4847741404898907e-05, "loss": 0.5097, "step": 2546 }, { "epoch": 0.35885875308207116, "grad_norm": 1.0918831242337208, "learning_rate": 1.4843749382754755e-05, "loss": 0.8706, "step": 2547 }, { "epoch": 0.35899964776329696, "grad_norm": 1.0275649841796048, "learning_rate": 1.4839756351823133e-05, "loss": 0.8705, "step": 2548 }, { "epoch": 0.3591405424445227, "grad_norm": 1.0519869705322653, "learning_rate": 1.4835762312935655e-05, "loss": 0.9108, "step": 2549 }, { "epoch": 0.3592814371257485, "grad_norm": 1.5977869120468935, "learning_rate": 1.483176726692414e-05, "loss": 0.5641, "step": 2550 }, { "epoch": 0.3594223318069743, "grad_norm": 1.051307469200583, "learning_rate": 1.4827771214620622e-05, "loss": 0.8476, "step": 2551 }, { "epoch": 0.35956322648820005, "grad_norm": 1.1493803045132303, "learning_rate": 1.482377415685734e-05, "loss": 0.9308, "step": 2552 }, { "epoch": 0.35970412116942585, "grad_norm": 1.4098710743136402, "learning_rate": 1.4819776094466743e-05, "loss": 0.5708, "step": 2553 }, { "epoch": 0.35984501585065165, "grad_norm": 1.2773481023204813, "learning_rate": 1.4815777028281497e-05, "loss": 0.5745, "step": 2554 }, { "epoch": 0.3599859105318774, "grad_norm": 1.0889786859407489, "learning_rate": 1.4811776959134465e-05, "loss": 0.9178, "step": 2555 }, { "epoch": 0.3601268052131032, "grad_norm": 1.0966013940868116, "learning_rate": 1.4807775887858726e-05, "loss": 0.9023, "step": 2556 }, { "epoch": 0.360267699894329, "grad_norm": 1.0171722787874211, "learning_rate": 1.480377381528757e-05, "loss": 0.9107, "step": 2557 }, { "epoch": 0.3604085945755548, "grad_norm": 0.9297722979331552, "learning_rate": 1.4799770742254489e-05, "loss": 0.8386, "step": 2558 }, { "epoch": 0.36054948925678054, "grad_norm": 1.178866365851904, "learning_rate": 1.4795766669593186e-05, "loss": 0.5488, "step": 2559 }, { "epoch": 0.36069038393800634, "grad_norm": 1.3149578003726279, "learning_rate": 1.479176159813757e-05, "loss": 0.5497, "step": 2560 }, { "epoch": 0.36083127861923214, "grad_norm": 0.9983389814912752, "learning_rate": 1.4787755528721765e-05, "loss": 0.8714, "step": 2561 }, { "epoch": 0.3609721733004579, "grad_norm": 1.0500357653872703, "learning_rate": 1.47837484621801e-05, "loss": 0.8943, "step": 2562 }, { "epoch": 0.3611130679816837, "grad_norm": 1.0478579808286554, "learning_rate": 1.4779740399347105e-05, "loss": 0.8354, "step": 2563 }, { "epoch": 0.3612539626629095, "grad_norm": 1.036771801019865, "learning_rate": 1.4775731341057524e-05, "loss": 0.9237, "step": 2564 }, { "epoch": 0.36139485734413523, "grad_norm": 1.2111180298287012, "learning_rate": 1.4771721288146306e-05, "loss": 0.4711, "step": 2565 }, { "epoch": 0.36153575202536103, "grad_norm": 1.1132708776358775, "learning_rate": 1.476771024144861e-05, "loss": 0.8812, "step": 2566 }, { "epoch": 0.36167664670658684, "grad_norm": 1.0083891579085642, "learning_rate": 1.4763698201799801e-05, "loss": 0.8649, "step": 2567 }, { "epoch": 0.36181754138781264, "grad_norm": 1.0568278091418604, "learning_rate": 1.4759685170035445e-05, "loss": 0.9016, "step": 2568 }, { "epoch": 0.3619584360690384, "grad_norm": 1.4057000301154159, "learning_rate": 1.4755671146991327e-05, "loss": 0.557, "step": 2569 }, { "epoch": 0.3620993307502642, "grad_norm": 0.9577491580584622, "learning_rate": 1.4751656133503422e-05, "loss": 0.8433, "step": 2570 }, { "epoch": 0.36224022543149, "grad_norm": 1.2567036498595312, "learning_rate": 1.4747640130407922e-05, "loss": 0.5806, "step": 2571 }, { "epoch": 0.3623811201127157, "grad_norm": 0.9165342199645223, "learning_rate": 1.4743623138541232e-05, "loss": 0.8347, "step": 2572 }, { "epoch": 0.3625220147939415, "grad_norm": 1.0884326742727741, "learning_rate": 1.4739605158739943e-05, "loss": 0.9465, "step": 2573 }, { "epoch": 0.3626629094751673, "grad_norm": 1.2118806648809828, "learning_rate": 1.4735586191840869e-05, "loss": 0.8582, "step": 2574 }, { "epoch": 0.3628038041563931, "grad_norm": 1.3257075231212327, "learning_rate": 1.4731566238681023e-05, "loss": 0.5497, "step": 2575 }, { "epoch": 0.3629446988376189, "grad_norm": 1.0321188959141163, "learning_rate": 1.4727545300097621e-05, "loss": 0.9363, "step": 2576 }, { "epoch": 0.3630855935188447, "grad_norm": 0.9716337041028095, "learning_rate": 1.4723523376928091e-05, "loss": 0.8834, "step": 2577 }, { "epoch": 0.3632264882000705, "grad_norm": 1.3234289586334531, "learning_rate": 1.4719500470010058e-05, "loss": 0.5791, "step": 2578 }, { "epoch": 0.3633673828812962, "grad_norm": 0.8988696708205421, "learning_rate": 1.4715476580181356e-05, "loss": 0.8995, "step": 2579 }, { "epoch": 0.363508277562522, "grad_norm": 1.0351808061876255, "learning_rate": 1.4711451708280028e-05, "loss": 0.889, "step": 2580 }, { "epoch": 0.3636491722437478, "grad_norm": 1.2081684693248766, "learning_rate": 1.4707425855144317e-05, "loss": 0.5215, "step": 2581 }, { "epoch": 0.36379006692497357, "grad_norm": 0.9227753034532132, "learning_rate": 1.4703399021612667e-05, "loss": 0.9035, "step": 2582 }, { "epoch": 0.36393096160619937, "grad_norm": 1.1431674778248784, "learning_rate": 1.4699371208523734e-05, "loss": 0.9265, "step": 2583 }, { "epoch": 0.36407185628742517, "grad_norm": 1.0027645947634427, "learning_rate": 1.4695342416716367e-05, "loss": 0.8995, "step": 2584 }, { "epoch": 0.3642127509686509, "grad_norm": 0.9601008687339556, "learning_rate": 1.4691312647029636e-05, "loss": 0.9051, "step": 2585 }, { "epoch": 0.3643536456498767, "grad_norm": 1.41806733940843, "learning_rate": 1.4687281900302794e-05, "loss": 0.5949, "step": 2586 }, { "epoch": 0.3644945403311025, "grad_norm": 1.4376468943376892, "learning_rate": 1.4683250177375311e-05, "loss": 0.6019, "step": 2587 }, { "epoch": 0.36463543501232826, "grad_norm": 0.98493307156343, "learning_rate": 1.4679217479086863e-05, "loss": 0.8282, "step": 2588 }, { "epoch": 0.36477632969355406, "grad_norm": 0.9633157634778832, "learning_rate": 1.4675183806277313e-05, "loss": 0.8737, "step": 2589 }, { "epoch": 0.36491722437477986, "grad_norm": 1.0911447449407499, "learning_rate": 1.4671149159786748e-05, "loss": 0.8829, "step": 2590 }, { "epoch": 0.36505811905600566, "grad_norm": 1.0015598454991215, "learning_rate": 1.4667113540455438e-05, "loss": 0.9003, "step": 2591 }, { "epoch": 0.3651990137372314, "grad_norm": 1.1140177290121112, "learning_rate": 1.466307694912387e-05, "loss": 0.8978, "step": 2592 }, { "epoch": 0.3653399084184572, "grad_norm": 1.000174455974953, "learning_rate": 1.4659039386632725e-05, "loss": 0.9013, "step": 2593 }, { "epoch": 0.365480803099683, "grad_norm": 0.9946433303179824, "learning_rate": 1.465500085382289e-05, "loss": 0.9084, "step": 2594 }, { "epoch": 0.36562169778090875, "grad_norm": 1.4051791706998007, "learning_rate": 1.4650961351535455e-05, "loss": 0.5311, "step": 2595 }, { "epoch": 0.36576259246213455, "grad_norm": 0.9614517272999572, "learning_rate": 1.464692088061171e-05, "loss": 0.8932, "step": 2596 }, { "epoch": 0.36590348714336035, "grad_norm": 1.4760038953910795, "learning_rate": 1.4642879441893146e-05, "loss": 0.4926, "step": 2597 }, { "epoch": 0.3660443818245861, "grad_norm": 1.2860290888248638, "learning_rate": 1.4638837036221456e-05, "loss": 0.5363, "step": 2598 }, { "epoch": 0.3661852765058119, "grad_norm": 1.3838047101285842, "learning_rate": 1.4634793664438536e-05, "loss": 0.5882, "step": 2599 }, { "epoch": 0.3663261711870377, "grad_norm": 1.2551064160794327, "learning_rate": 1.4630749327386482e-05, "loss": 0.9349, "step": 2600 }, { "epoch": 0.3664670658682635, "grad_norm": 1.0782107955833207, "learning_rate": 1.4626704025907591e-05, "loss": 0.8816, "step": 2601 }, { "epoch": 0.36660796054948924, "grad_norm": 1.2679371528836254, "learning_rate": 1.4622657760844364e-05, "loss": 0.9007, "step": 2602 }, { "epoch": 0.36674885523071504, "grad_norm": 1.1039595321548838, "learning_rate": 1.4618610533039494e-05, "loss": 0.9425, "step": 2603 }, { "epoch": 0.36688974991194084, "grad_norm": 1.0278142535507109, "learning_rate": 1.4614562343335883e-05, "loss": 0.8645, "step": 2604 }, { "epoch": 0.3670306445931666, "grad_norm": 0.9989043218510216, "learning_rate": 1.461051319257663e-05, "loss": 0.8967, "step": 2605 }, { "epoch": 0.3671715392743924, "grad_norm": 1.289042309548499, "learning_rate": 1.460646308160504e-05, "loss": 0.468, "step": 2606 }, { "epoch": 0.3673124339556182, "grad_norm": 0.9811785171180903, "learning_rate": 1.4602412011264604e-05, "loss": 0.8858, "step": 2607 }, { "epoch": 0.36745332863684393, "grad_norm": 1.002871048711499, "learning_rate": 1.459835998239903e-05, "loss": 0.9192, "step": 2608 }, { "epoch": 0.36759422331806973, "grad_norm": 0.979863836985097, "learning_rate": 1.4594306995852208e-05, "loss": 0.9392, "step": 2609 }, { "epoch": 0.36773511799929554, "grad_norm": 0.9327419378489972, "learning_rate": 1.4590253052468244e-05, "loss": 0.8691, "step": 2610 }, { "epoch": 0.36787601268052134, "grad_norm": 0.8857260525454457, "learning_rate": 1.4586198153091433e-05, "loss": 0.906, "step": 2611 }, { "epoch": 0.3680169073617471, "grad_norm": 0.9248919436580549, "learning_rate": 1.4582142298566272e-05, "loss": 0.8493, "step": 2612 }, { "epoch": 0.3681578020429729, "grad_norm": 0.9913302860540321, "learning_rate": 1.4578085489737456e-05, "loss": 0.8954, "step": 2613 }, { "epoch": 0.3682986967241987, "grad_norm": 1.4715108131915136, "learning_rate": 1.4574027727449885e-05, "loss": 0.491, "step": 2614 }, { "epoch": 0.3684395914054244, "grad_norm": 0.9847441499143073, "learning_rate": 1.4569969012548644e-05, "loss": 0.9197, "step": 2615 }, { "epoch": 0.3685804860866502, "grad_norm": 1.0732831800709473, "learning_rate": 1.456590934587903e-05, "loss": 0.9056, "step": 2616 }, { "epoch": 0.36872138076787603, "grad_norm": 0.9806828379762881, "learning_rate": 1.4561848728286532e-05, "loss": 0.8899, "step": 2617 }, { "epoch": 0.3688622754491018, "grad_norm": 1.059249606341831, "learning_rate": 1.4557787160616838e-05, "loss": 0.8598, "step": 2618 }, { "epoch": 0.3690031701303276, "grad_norm": 1.2459470583530678, "learning_rate": 1.4553724643715832e-05, "loss": 0.4765, "step": 2619 }, { "epoch": 0.3691440648115534, "grad_norm": 1.1157625263288482, "learning_rate": 1.4549661178429597e-05, "loss": 0.9728, "step": 2620 }, { "epoch": 0.3692849594927792, "grad_norm": 0.9968936260985282, "learning_rate": 1.454559676560442e-05, "loss": 0.908, "step": 2621 }, { "epoch": 0.3694258541740049, "grad_norm": 1.0895596642936436, "learning_rate": 1.4541531406086771e-05, "loss": 0.8767, "step": 2622 }, { "epoch": 0.3695667488552307, "grad_norm": 1.2400263228293107, "learning_rate": 1.453746510072333e-05, "loss": 0.9239, "step": 2623 }, { "epoch": 0.3697076435364565, "grad_norm": 1.007703385789011, "learning_rate": 1.4533397850360969e-05, "loss": 0.9321, "step": 2624 }, { "epoch": 0.36984853821768227, "grad_norm": 0.9502560722264068, "learning_rate": 1.4529329655846755e-05, "loss": 0.9017, "step": 2625 }, { "epoch": 0.36998943289890807, "grad_norm": 1.0901486582036684, "learning_rate": 1.4525260518027961e-05, "loss": 0.9299, "step": 2626 }, { "epoch": 0.37013032758013387, "grad_norm": 1.4540887704699854, "learning_rate": 1.4521190437752042e-05, "loss": 0.5364, "step": 2627 }, { "epoch": 0.3702712222613596, "grad_norm": 1.2953882891223465, "learning_rate": 1.4517119415866659e-05, "loss": 0.7196, "step": 2628 }, { "epoch": 0.3704121169425854, "grad_norm": 1.048532723366307, "learning_rate": 1.4513047453219666e-05, "loss": 0.9255, "step": 2629 }, { "epoch": 0.3705530116238112, "grad_norm": 0.9685274232908208, "learning_rate": 1.4508974550659114e-05, "loss": 0.9272, "step": 2630 }, { "epoch": 0.37069390630503696, "grad_norm": 0.983561945764653, "learning_rate": 1.450490070903325e-05, "loss": 0.8738, "step": 2631 }, { "epoch": 0.37083480098626276, "grad_norm": 1.1811075011900813, "learning_rate": 1.4500825929190517e-05, "loss": 0.907, "step": 2632 }, { "epoch": 0.37097569566748856, "grad_norm": 1.0361627088113154, "learning_rate": 1.449675021197955e-05, "loss": 0.9041, "step": 2633 }, { "epoch": 0.37111659034871436, "grad_norm": 0.9438738961498532, "learning_rate": 1.4492673558249184e-05, "loss": 0.9038, "step": 2634 }, { "epoch": 0.3712574850299401, "grad_norm": 1.0007041863790542, "learning_rate": 1.4488595968848442e-05, "loss": 0.8091, "step": 2635 }, { "epoch": 0.3713983797111659, "grad_norm": 1.52168537136729, "learning_rate": 1.4484517444626552e-05, "loss": 0.6179, "step": 2636 }, { "epoch": 0.3715392743923917, "grad_norm": 1.3365429030954348, "learning_rate": 1.448043798643293e-05, "loss": 0.5187, "step": 2637 }, { "epoch": 0.37168016907361745, "grad_norm": 1.3917811444658565, "learning_rate": 1.4476357595117185e-05, "loss": 0.6238, "step": 2638 }, { "epoch": 0.37182106375484325, "grad_norm": 1.1469964956674545, "learning_rate": 1.4472276271529127e-05, "loss": 0.5133, "step": 2639 }, { "epoch": 0.37196195843606905, "grad_norm": 1.3145653032939741, "learning_rate": 1.446819401651875e-05, "loss": 0.5882, "step": 2640 }, { "epoch": 0.3721028531172948, "grad_norm": 1.0348199809591745, "learning_rate": 1.4464110830936258e-05, "loss": 0.8538, "step": 2641 }, { "epoch": 0.3722437477985206, "grad_norm": 1.0262246516208704, "learning_rate": 1.446002671563203e-05, "loss": 0.8799, "step": 2642 }, { "epoch": 0.3723846424797464, "grad_norm": 1.1200688606185631, "learning_rate": 1.4455941671456653e-05, "loss": 0.8911, "step": 2643 }, { "epoch": 0.3725255371609722, "grad_norm": 1.275194504764262, "learning_rate": 1.4451855699260899e-05, "loss": 0.5943, "step": 2644 }, { "epoch": 0.37266643184219794, "grad_norm": 1.0938736024877755, "learning_rate": 1.4447768799895736e-05, "loss": 0.8951, "step": 2645 }, { "epoch": 0.37280732652342374, "grad_norm": 1.0517241810009998, "learning_rate": 1.444368097421233e-05, "loss": 0.9056, "step": 2646 }, { "epoch": 0.37294822120464954, "grad_norm": 1.0489671870345518, "learning_rate": 1.4439592223062032e-05, "loss": 0.9449, "step": 2647 }, { "epoch": 0.3730891158858753, "grad_norm": 1.2767380843050218, "learning_rate": 1.4435502547296385e-05, "loss": 0.6517, "step": 2648 }, { "epoch": 0.3732300105671011, "grad_norm": 1.0830491260216508, "learning_rate": 1.4431411947767137e-05, "loss": 0.8599, "step": 2649 }, { "epoch": 0.3733709052483269, "grad_norm": 0.996122488413566, "learning_rate": 1.4427320425326217e-05, "loss": 0.8804, "step": 2650 }, { "epoch": 0.37351179992955263, "grad_norm": 1.3450719171829433, "learning_rate": 1.4423227980825747e-05, "loss": 0.6483, "step": 2651 }, { "epoch": 0.37365269461077844, "grad_norm": 0.936253141098463, "learning_rate": 1.4419134615118047e-05, "loss": 0.9106, "step": 2652 }, { "epoch": 0.37379358929200424, "grad_norm": 1.039643406875286, "learning_rate": 1.4415040329055621e-05, "loss": 0.9493, "step": 2653 }, { "epoch": 0.37393448397323004, "grad_norm": 0.8968508078746353, "learning_rate": 1.441094512349117e-05, "loss": 0.8591, "step": 2654 }, { "epoch": 0.3740753786544558, "grad_norm": 1.0034075557246667, "learning_rate": 1.4406848999277592e-05, "loss": 0.8836, "step": 2655 }, { "epoch": 0.3742162733356816, "grad_norm": 0.9203199081607838, "learning_rate": 1.4402751957267958e-05, "loss": 0.8639, "step": 2656 }, { "epoch": 0.3743571680169074, "grad_norm": 1.0489629598402, "learning_rate": 1.4398653998315552e-05, "loss": 0.8617, "step": 2657 }, { "epoch": 0.3744980626981331, "grad_norm": 1.010970045647804, "learning_rate": 1.4394555123273832e-05, "loss": 0.921, "step": 2658 }, { "epoch": 0.37463895737935893, "grad_norm": 1.001608986832933, "learning_rate": 1.4390455332996455e-05, "loss": 0.9475, "step": 2659 }, { "epoch": 0.37477985206058473, "grad_norm": 1.3721819955341574, "learning_rate": 1.4386354628337274e-05, "loss": 0.9345, "step": 2660 }, { "epoch": 0.3749207467418105, "grad_norm": 1.337763316847543, "learning_rate": 1.4382253010150317e-05, "loss": 0.5875, "step": 2661 }, { "epoch": 0.3750616414230363, "grad_norm": 0.9984484778069066, "learning_rate": 1.4378150479289814e-05, "loss": 0.8792, "step": 2662 }, { "epoch": 0.3752025361042621, "grad_norm": 0.9260607899215897, "learning_rate": 1.4374047036610184e-05, "loss": 0.8756, "step": 2663 }, { "epoch": 0.3753434307854879, "grad_norm": 1.0355763191624208, "learning_rate": 1.4369942682966027e-05, "loss": 0.9243, "step": 2664 }, { "epoch": 0.3754843254667136, "grad_norm": 0.9644737034090365, "learning_rate": 1.436583741921215e-05, "loss": 0.9169, "step": 2665 }, { "epoch": 0.3756252201479394, "grad_norm": 1.0419292997050724, "learning_rate": 1.4361731246203531e-05, "loss": 0.8952, "step": 2666 }, { "epoch": 0.3757661148291652, "grad_norm": 1.4056284295789896, "learning_rate": 1.4357624164795349e-05, "loss": 0.6277, "step": 2667 }, { "epoch": 0.37590700951039097, "grad_norm": 1.442633726929044, "learning_rate": 1.435351617584297e-05, "loss": 0.5645, "step": 2668 }, { "epoch": 0.37604790419161677, "grad_norm": 0.9903541744580975, "learning_rate": 1.4349407280201944e-05, "loss": 0.8294, "step": 2669 }, { "epoch": 0.37618879887284257, "grad_norm": 1.2757965731171155, "learning_rate": 1.4345297478728016e-05, "loss": 0.6324, "step": 2670 }, { "epoch": 0.3763296935540683, "grad_norm": 1.458630837810372, "learning_rate": 1.4341186772277113e-05, "loss": 0.4922, "step": 2671 }, { "epoch": 0.3764705882352941, "grad_norm": 1.0433304735755928, "learning_rate": 1.4337075161705361e-05, "loss": 0.9363, "step": 2672 }, { "epoch": 0.3766114829165199, "grad_norm": 0.9409582078356701, "learning_rate": 1.4332962647869066e-05, "loss": 0.8954, "step": 2673 }, { "epoch": 0.37675237759774566, "grad_norm": 0.973075230083151, "learning_rate": 1.432884923162472e-05, "loss": 0.9226, "step": 2674 }, { "epoch": 0.37689327227897146, "grad_norm": 0.9414746848467365, "learning_rate": 1.4324734913829013e-05, "loss": 0.9088, "step": 2675 }, { "epoch": 0.37703416696019726, "grad_norm": 1.4555661470301429, "learning_rate": 1.4320619695338813e-05, "loss": 0.4901, "step": 2676 }, { "epoch": 0.37717506164142306, "grad_norm": 0.9064323825597219, "learning_rate": 1.4316503577011179e-05, "loss": 0.895, "step": 2677 }, { "epoch": 0.3773159563226488, "grad_norm": 1.0250316337756322, "learning_rate": 1.4312386559703361e-05, "loss": 0.872, "step": 2678 }, { "epoch": 0.3774568510038746, "grad_norm": 0.9837004849600032, "learning_rate": 1.430826864427279e-05, "loss": 0.8765, "step": 2679 }, { "epoch": 0.3775977456851004, "grad_norm": 1.0114991317782192, "learning_rate": 1.430414983157709e-05, "loss": 0.8988, "step": 2680 }, { "epoch": 0.37773864036632615, "grad_norm": 1.2878190580839464, "learning_rate": 1.4300030122474065e-05, "loss": 0.6067, "step": 2681 }, { "epoch": 0.37787953504755195, "grad_norm": 0.9972573449716491, "learning_rate": 1.4295909517821711e-05, "loss": 0.898, "step": 2682 }, { "epoch": 0.37802042972877775, "grad_norm": 1.5371515289870878, "learning_rate": 1.4291788018478215e-05, "loss": 0.8006, "step": 2683 }, { "epoch": 0.3781613244100035, "grad_norm": 1.013402512558747, "learning_rate": 1.4287665625301934e-05, "loss": 0.8679, "step": 2684 }, { "epoch": 0.3783022190912293, "grad_norm": 1.0715454633218908, "learning_rate": 1.4283542339151428e-05, "loss": 0.911, "step": 2685 }, { "epoch": 0.3784431137724551, "grad_norm": 1.3082460779999496, "learning_rate": 1.4279418160885436e-05, "loss": 0.6337, "step": 2686 }, { "epoch": 0.3785840084536809, "grad_norm": 0.9241367492756379, "learning_rate": 1.4275293091362886e-05, "loss": 0.8528, "step": 2687 }, { "epoch": 0.37872490313490664, "grad_norm": 1.0739375841971093, "learning_rate": 1.4271167131442884e-05, "loss": 0.9012, "step": 2688 }, { "epoch": 0.37886579781613244, "grad_norm": 0.9703301741863104, "learning_rate": 1.4267040281984727e-05, "loss": 0.8822, "step": 2689 }, { "epoch": 0.37900669249735824, "grad_norm": 1.184633645551278, "learning_rate": 1.4262912543847898e-05, "loss": 0.4816, "step": 2690 }, { "epoch": 0.379147587178584, "grad_norm": 1.0470425590228487, "learning_rate": 1.4258783917892068e-05, "loss": 0.9168, "step": 2691 }, { "epoch": 0.3792884818598098, "grad_norm": 0.9610090946615718, "learning_rate": 1.4254654404977079e-05, "loss": 0.8806, "step": 2692 }, { "epoch": 0.3794293765410356, "grad_norm": 1.5754035821552284, "learning_rate": 1.4250524005962977e-05, "loss": 0.6247, "step": 2693 }, { "epoch": 0.37957027122226134, "grad_norm": 1.2153407891074917, "learning_rate": 1.4246392721709979e-05, "loss": 0.5307, "step": 2694 }, { "epoch": 0.37971116590348714, "grad_norm": 0.8726684646410147, "learning_rate": 1.4242260553078489e-05, "loss": 0.8726, "step": 2695 }, { "epoch": 0.37985206058471294, "grad_norm": 0.9508189399195672, "learning_rate": 1.42381275009291e-05, "loss": 0.8986, "step": 2696 }, { "epoch": 0.37999295526593874, "grad_norm": 1.182450452458229, "learning_rate": 1.4233993566122582e-05, "loss": 0.8538, "step": 2697 }, { "epoch": 0.3801338499471645, "grad_norm": 1.2365974127389725, "learning_rate": 1.4229858749519893e-05, "loss": 0.6066, "step": 2698 }, { "epoch": 0.3802747446283903, "grad_norm": 1.2667960310919084, "learning_rate": 1.4225723051982176e-05, "loss": 0.528, "step": 2699 }, { "epoch": 0.3804156393096161, "grad_norm": 0.9571816019919059, "learning_rate": 1.4221586474370752e-05, "loss": 0.8908, "step": 2700 }, { "epoch": 0.3805565339908418, "grad_norm": 0.9618489972830847, "learning_rate": 1.4217449017547134e-05, "loss": 0.8532, "step": 2701 }, { "epoch": 0.38069742867206763, "grad_norm": 1.0853423521141783, "learning_rate": 1.4213310682373007e-05, "loss": 0.8628, "step": 2702 }, { "epoch": 0.38083832335329343, "grad_norm": 0.9413820362673528, "learning_rate": 1.4209171469710252e-05, "loss": 0.9375, "step": 2703 }, { "epoch": 0.3809792180345192, "grad_norm": 1.0584191783216828, "learning_rate": 1.4205031380420918e-05, "loss": 0.8884, "step": 2704 }, { "epoch": 0.381120112715745, "grad_norm": 1.2295920262391862, "learning_rate": 1.4200890415367248e-05, "loss": 0.9452, "step": 2705 }, { "epoch": 0.3812610073969708, "grad_norm": 1.0258196502729657, "learning_rate": 1.4196748575411664e-05, "loss": 0.9593, "step": 2706 }, { "epoch": 0.3814019020781966, "grad_norm": 0.9748955606025512, "learning_rate": 1.4192605861416767e-05, "loss": 0.8548, "step": 2707 }, { "epoch": 0.3815427967594223, "grad_norm": 1.4138372904785452, "learning_rate": 1.4188462274245342e-05, "loss": 0.5444, "step": 2708 }, { "epoch": 0.3816836914406481, "grad_norm": 0.9845423784073084, "learning_rate": 1.4184317814760362e-05, "loss": 0.944, "step": 2709 }, { "epoch": 0.3818245861218739, "grad_norm": 0.9956906665161138, "learning_rate": 1.4180172483824972e-05, "loss": 0.9087, "step": 2710 }, { "epoch": 0.38196548080309967, "grad_norm": 0.9732179290820794, "learning_rate": 1.4176026282302506e-05, "loss": 0.9018, "step": 2711 }, { "epoch": 0.38210637548432547, "grad_norm": 1.3360443631600942, "learning_rate": 1.4171879211056473e-05, "loss": 0.5457, "step": 2712 }, { "epoch": 0.38224727016555127, "grad_norm": 1.2380896686052711, "learning_rate": 1.416773127095057e-05, "loss": 0.5089, "step": 2713 }, { "epoch": 0.382388164846777, "grad_norm": 0.9556758947041227, "learning_rate": 1.4163582462848666e-05, "loss": 0.8596, "step": 2714 }, { "epoch": 0.3825290595280028, "grad_norm": 1.4576415868102723, "learning_rate": 1.4159432787614815e-05, "loss": 0.5978, "step": 2715 }, { "epoch": 0.3826699542092286, "grad_norm": 1.4690401762073495, "learning_rate": 1.415528224611326e-05, "loss": 0.7945, "step": 2716 }, { "epoch": 0.38281084889045436, "grad_norm": 0.9368174948017802, "learning_rate": 1.4151130839208414e-05, "loss": 0.932, "step": 2717 }, { "epoch": 0.38295174357168016, "grad_norm": 0.9140359408979827, "learning_rate": 1.4146978567764868e-05, "loss": 0.9021, "step": 2718 }, { "epoch": 0.38309263825290596, "grad_norm": 0.9392759619682749, "learning_rate": 1.4142825432647405e-05, "loss": 0.9047, "step": 2719 }, { "epoch": 0.38323353293413176, "grad_norm": 1.3659296552359226, "learning_rate": 1.4138671434720976e-05, "loss": 0.6309, "step": 2720 }, { "epoch": 0.3833744276153575, "grad_norm": 1.0578244775578045, "learning_rate": 1.4134516574850721e-05, "loss": 0.9369, "step": 2721 }, { "epoch": 0.3835153222965833, "grad_norm": 1.0765175253771344, "learning_rate": 1.4130360853901956e-05, "loss": 0.9573, "step": 2722 }, { "epoch": 0.3836562169778091, "grad_norm": 0.9862409910867952, "learning_rate": 1.412620427274017e-05, "loss": 0.9117, "step": 2723 }, { "epoch": 0.38379711165903485, "grad_norm": 1.2694205005959913, "learning_rate": 1.4122046832231042e-05, "loss": 0.6603, "step": 2724 }, { "epoch": 0.38393800634026065, "grad_norm": 1.2191431465979168, "learning_rate": 1.4117888533240423e-05, "loss": 0.5611, "step": 2725 }, { "epoch": 0.38407890102148645, "grad_norm": 0.9503485610981477, "learning_rate": 1.4113729376634341e-05, "loss": 0.8656, "step": 2726 }, { "epoch": 0.3842197957027122, "grad_norm": 1.3957884034059989, "learning_rate": 1.4109569363279014e-05, "loss": 0.5752, "step": 2727 }, { "epoch": 0.384360690383938, "grad_norm": 1.0723430955401256, "learning_rate": 1.4105408494040824e-05, "loss": 0.8722, "step": 2728 }, { "epoch": 0.3845015850651638, "grad_norm": 1.0082409499633467, "learning_rate": 1.4101246769786342e-05, "loss": 0.9191, "step": 2729 }, { "epoch": 0.3846424797463896, "grad_norm": 0.9996789197261177, "learning_rate": 1.4097084191382306e-05, "loss": 0.8848, "step": 2730 }, { "epoch": 0.38478337442761534, "grad_norm": 0.9599608238591734, "learning_rate": 1.4092920759695648e-05, "loss": 0.8859, "step": 2731 }, { "epoch": 0.38492426910884114, "grad_norm": 1.125181319242714, "learning_rate": 1.4088756475593466e-05, "loss": 0.9109, "step": 2732 }, { "epoch": 0.38506516379006694, "grad_norm": 1.2911076422769328, "learning_rate": 1.4084591339943032e-05, "loss": 0.5545, "step": 2733 }, { "epoch": 0.3852060584712927, "grad_norm": 1.0640975593777793, "learning_rate": 1.408042535361181e-05, "loss": 0.9334, "step": 2734 }, { "epoch": 0.3853469531525185, "grad_norm": 1.0242428723473072, "learning_rate": 1.4076258517467425e-05, "loss": 0.9136, "step": 2735 }, { "epoch": 0.3854878478337443, "grad_norm": 1.08573080176591, "learning_rate": 1.4072090832377691e-05, "loss": 0.9102, "step": 2736 }, { "epoch": 0.38562874251497004, "grad_norm": 1.0369427802083673, "learning_rate": 1.4067922299210597e-05, "loss": 0.8848, "step": 2737 }, { "epoch": 0.38576963719619584, "grad_norm": 1.478065040822456, "learning_rate": 1.40637529188343e-05, "loss": 0.6779, "step": 2738 }, { "epoch": 0.38591053187742164, "grad_norm": 1.504316545474564, "learning_rate": 1.4059582692117139e-05, "loss": 0.5467, "step": 2739 }, { "epoch": 0.38605142655864744, "grad_norm": 1.480389753332072, "learning_rate": 1.4055411619927637e-05, "loss": 0.4702, "step": 2740 }, { "epoch": 0.3861923212398732, "grad_norm": 1.1848534960088453, "learning_rate": 1.405123970313448e-05, "loss": 0.9495, "step": 2741 }, { "epoch": 0.386333215921099, "grad_norm": 1.2232881808421334, "learning_rate": 1.4047066942606538e-05, "loss": 0.4652, "step": 2742 }, { "epoch": 0.3864741106023248, "grad_norm": 1.0224606424670255, "learning_rate": 1.4042893339212853e-05, "loss": 0.8758, "step": 2743 }, { "epoch": 0.38661500528355053, "grad_norm": 1.0805874838063423, "learning_rate": 1.4038718893822643e-05, "loss": 0.4192, "step": 2744 }, { "epoch": 0.38675589996477633, "grad_norm": 1.1070350785833156, "learning_rate": 1.4034543607305305e-05, "loss": 0.8352, "step": 2745 }, { "epoch": 0.38689679464600213, "grad_norm": 0.9457006024340259, "learning_rate": 1.4030367480530407e-05, "loss": 0.9137, "step": 2746 }, { "epoch": 0.3870376893272279, "grad_norm": 1.2280042643758016, "learning_rate": 1.4026190514367695e-05, "loss": 0.8801, "step": 2747 }, { "epoch": 0.3871785840084537, "grad_norm": 1.1515365326237714, "learning_rate": 1.4022012709687085e-05, "loss": 0.4997, "step": 2748 }, { "epoch": 0.3873194786896795, "grad_norm": 0.998870140236404, "learning_rate": 1.4017834067358673e-05, "loss": 0.867, "step": 2749 }, { "epoch": 0.3874603733709053, "grad_norm": 0.9663765963955361, "learning_rate": 1.4013654588252732e-05, "loss": 0.948, "step": 2750 }, { "epoch": 0.387601268052131, "grad_norm": 0.9241566158379367, "learning_rate": 1.4009474273239696e-05, "loss": 0.8466, "step": 2751 }, { "epoch": 0.3877421627333568, "grad_norm": 0.9702773076787898, "learning_rate": 1.4005293123190189e-05, "loss": 0.8778, "step": 2752 }, { "epoch": 0.3878830574145826, "grad_norm": 1.0352691484440977, "learning_rate": 1.4001111138974997e-05, "loss": 0.8957, "step": 2753 }, { "epoch": 0.38802395209580837, "grad_norm": 1.0117928708154662, "learning_rate": 1.3996928321465085e-05, "loss": 0.8612, "step": 2754 }, { "epoch": 0.38816484677703417, "grad_norm": 0.9965848528153688, "learning_rate": 1.3992744671531593e-05, "loss": 0.9223, "step": 2755 }, { "epoch": 0.38830574145825997, "grad_norm": 1.0458524367470634, "learning_rate": 1.398856019004583e-05, "loss": 0.9074, "step": 2756 }, { "epoch": 0.3884466361394857, "grad_norm": 1.2167601908286307, "learning_rate": 1.3984374877879283e-05, "loss": 0.8516, "step": 2757 }, { "epoch": 0.3885875308207115, "grad_norm": 1.0764979112634219, "learning_rate": 1.3980188735903608e-05, "loss": 0.9159, "step": 2758 }, { "epoch": 0.3887284255019373, "grad_norm": 1.1149227035582057, "learning_rate": 1.3976001764990634e-05, "loss": 0.9593, "step": 2759 }, { "epoch": 0.38886932018316306, "grad_norm": 0.9583264350853887, "learning_rate": 1.3971813966012368e-05, "loss": 0.8851, "step": 2760 }, { "epoch": 0.38901021486438886, "grad_norm": 1.010157311314016, "learning_rate": 1.3967625339840982e-05, "loss": 0.8858, "step": 2761 }, { "epoch": 0.38915110954561466, "grad_norm": 1.0269817290082737, "learning_rate": 1.3963435887348824e-05, "loss": 0.8833, "step": 2762 }, { "epoch": 0.38929200422684046, "grad_norm": 0.9356673104236449, "learning_rate": 1.3959245609408416e-05, "loss": 0.8703, "step": 2763 }, { "epoch": 0.3894328989080662, "grad_norm": 1.3281023354261021, "learning_rate": 1.3955054506892448e-05, "loss": 0.4784, "step": 2764 }, { "epoch": 0.389573793589292, "grad_norm": 1.1505340792900758, "learning_rate": 1.3950862580673782e-05, "loss": 0.9426, "step": 2765 }, { "epoch": 0.3897146882705178, "grad_norm": 0.9590946300584678, "learning_rate": 1.3946669831625455e-05, "loss": 0.8999, "step": 2766 }, { "epoch": 0.38985558295174355, "grad_norm": 0.9140449524438581, "learning_rate": 1.3942476260620672e-05, "loss": 0.917, "step": 2767 }, { "epoch": 0.38999647763296935, "grad_norm": 1.0500712772680714, "learning_rate": 1.3938281868532813e-05, "loss": 0.8817, "step": 2768 }, { "epoch": 0.39013737231419515, "grad_norm": 0.9301511207719211, "learning_rate": 1.3934086656235424e-05, "loss": 0.931, "step": 2769 }, { "epoch": 0.3902782669954209, "grad_norm": 1.0406489278786955, "learning_rate": 1.3929890624602225e-05, "loss": 0.8995, "step": 2770 }, { "epoch": 0.3904191616766467, "grad_norm": 1.5768847245205402, "learning_rate": 1.3925693774507107e-05, "loss": 0.5605, "step": 2771 }, { "epoch": 0.3905600563578725, "grad_norm": 1.0037557351536828, "learning_rate": 1.392149610682413e-05, "loss": 0.8789, "step": 2772 }, { "epoch": 0.3907009510390983, "grad_norm": 1.212483389704413, "learning_rate": 1.3917297622427526e-05, "loss": 0.4823, "step": 2773 }, { "epoch": 0.39084184572032404, "grad_norm": 1.2598254516365601, "learning_rate": 1.3913098322191692e-05, "loss": 0.4663, "step": 2774 }, { "epoch": 0.39098274040154984, "grad_norm": 1.1044867221207186, "learning_rate": 1.39088982069912e-05, "loss": 0.8869, "step": 2775 }, { "epoch": 0.39112363508277564, "grad_norm": 1.3156205822773293, "learning_rate": 1.3904697277700795e-05, "loss": 0.525, "step": 2776 }, { "epoch": 0.3912645297640014, "grad_norm": 1.0227420061740455, "learning_rate": 1.3900495535195379e-05, "loss": 0.8646, "step": 2777 }, { "epoch": 0.3914054244452272, "grad_norm": 1.0211223722806728, "learning_rate": 1.3896292980350042e-05, "loss": 0.9037, "step": 2778 }, { "epoch": 0.391546319126453, "grad_norm": 1.2593973660447406, "learning_rate": 1.389208961404002e-05, "loss": 0.537, "step": 2779 }, { "epoch": 0.39168721380767874, "grad_norm": 1.2674138040330645, "learning_rate": 1.388788543714074e-05, "loss": 0.9037, "step": 2780 }, { "epoch": 0.39182810848890454, "grad_norm": 1.3833950315602985, "learning_rate": 1.3883680450527788e-05, "loss": 0.5479, "step": 2781 }, { "epoch": 0.39196900317013034, "grad_norm": 1.3851658277282506, "learning_rate": 1.3879474655076911e-05, "loss": 0.5643, "step": 2782 }, { "epoch": 0.39210989785135614, "grad_norm": 1.3360164924068294, "learning_rate": 1.3875268051664041e-05, "loss": 0.4864, "step": 2783 }, { "epoch": 0.3922507925325819, "grad_norm": 1.0163959864321404, "learning_rate": 1.3871060641165266e-05, "loss": 0.9196, "step": 2784 }, { "epoch": 0.3923916872138077, "grad_norm": 1.0243714200661913, "learning_rate": 1.3866852424456848e-05, "loss": 0.9092, "step": 2785 }, { "epoch": 0.3925325818950335, "grad_norm": 1.0154068436025745, "learning_rate": 1.3862643402415212e-05, "loss": 0.8717, "step": 2786 }, { "epoch": 0.39267347657625923, "grad_norm": 1.1057700795749312, "learning_rate": 1.3858433575916953e-05, "loss": 0.9055, "step": 2787 }, { "epoch": 0.39281437125748503, "grad_norm": 1.343807350232464, "learning_rate": 1.3854222945838838e-05, "loss": 0.546, "step": 2788 }, { "epoch": 0.39295526593871083, "grad_norm": 1.2636462949186444, "learning_rate": 1.3850011513057795e-05, "loss": 0.5605, "step": 2789 }, { "epoch": 0.3930961606199366, "grad_norm": 1.0279890960421671, "learning_rate": 1.384579927845092e-05, "loss": 0.9737, "step": 2790 }, { "epoch": 0.3932370553011624, "grad_norm": 0.9343552332638719, "learning_rate": 1.384158624289548e-05, "loss": 0.8828, "step": 2791 }, { "epoch": 0.3933779499823882, "grad_norm": 1.2761215308918343, "learning_rate": 1.3837372407268904e-05, "loss": 0.567, "step": 2792 }, { "epoch": 0.393518844663614, "grad_norm": 1.0086107842139804, "learning_rate": 1.3833157772448793e-05, "loss": 0.8466, "step": 2793 }, { "epoch": 0.3936597393448397, "grad_norm": 0.9069580700830964, "learning_rate": 1.3828942339312907e-05, "loss": 0.8965, "step": 2794 }, { "epoch": 0.3938006340260655, "grad_norm": 0.9611570908335394, "learning_rate": 1.3824726108739184e-05, "loss": 0.8551, "step": 2795 }, { "epoch": 0.3939415287072913, "grad_norm": 0.9541690472988509, "learning_rate": 1.3820509081605713e-05, "loss": 0.9481, "step": 2796 }, { "epoch": 0.39408242338851707, "grad_norm": 0.9959558377493493, "learning_rate": 1.3816291258790762e-05, "loss": 0.8692, "step": 2797 }, { "epoch": 0.39422331806974287, "grad_norm": 0.948121192481176, "learning_rate": 1.3812072641172753e-05, "loss": 0.8873, "step": 2798 }, { "epoch": 0.39436421275096867, "grad_norm": 1.4721105075978902, "learning_rate": 1.3807853229630288e-05, "loss": 0.5686, "step": 2799 }, { "epoch": 0.3945051074321944, "grad_norm": 1.0350897837008042, "learning_rate": 1.380363302504212e-05, "loss": 0.8957, "step": 2800 }, { "epoch": 0.3946460021134202, "grad_norm": 1.0697097129826667, "learning_rate": 1.3799412028287175e-05, "loss": 0.8538, "step": 2801 }, { "epoch": 0.394786896794646, "grad_norm": 1.0988271102957987, "learning_rate": 1.3795190240244547e-05, "loss": 0.5513, "step": 2802 }, { "epoch": 0.39492779147587176, "grad_norm": 1.2599747569446658, "learning_rate": 1.3790967661793481e-05, "loss": 0.4975, "step": 2803 }, { "epoch": 0.39506868615709756, "grad_norm": 1.0072288455327731, "learning_rate": 1.3786744293813405e-05, "loss": 0.892, "step": 2804 }, { "epoch": 0.39520958083832336, "grad_norm": 1.0101724093276403, "learning_rate": 1.3782520137183896e-05, "loss": 0.8806, "step": 2805 }, { "epoch": 0.39535047551954916, "grad_norm": 1.1605995147987516, "learning_rate": 1.3778295192784705e-05, "loss": 0.5327, "step": 2806 }, { "epoch": 0.3954913702007749, "grad_norm": 1.110554172522432, "learning_rate": 1.3774069461495743e-05, "loss": 0.9271, "step": 2807 }, { "epoch": 0.3956322648820007, "grad_norm": 1.00561115513352, "learning_rate": 1.3769842944197082e-05, "loss": 0.8254, "step": 2808 }, { "epoch": 0.3957731595632265, "grad_norm": 1.0549112922815669, "learning_rate": 1.3765615641768966e-05, "loss": 0.9261, "step": 2809 }, { "epoch": 0.39591405424445225, "grad_norm": 0.9432227672371356, "learning_rate": 1.3761387555091795e-05, "loss": 0.883, "step": 2810 }, { "epoch": 0.39605494892567805, "grad_norm": 1.1695550018894385, "learning_rate": 1.3757158685046132e-05, "loss": 0.4264, "step": 2811 }, { "epoch": 0.39619584360690385, "grad_norm": 0.9840833708777257, "learning_rate": 1.3752929032512717e-05, "loss": 0.9028, "step": 2812 }, { "epoch": 0.3963367382881296, "grad_norm": 1.0377184633925345, "learning_rate": 1.3748698598372428e-05, "loss": 0.8704, "step": 2813 }, { "epoch": 0.3964776329693554, "grad_norm": 1.1298463310762183, "learning_rate": 1.374446738350633e-05, "loss": 0.9362, "step": 2814 }, { "epoch": 0.3966185276505812, "grad_norm": 1.092555593942672, "learning_rate": 1.3740235388795633e-05, "loss": 0.4422, "step": 2815 }, { "epoch": 0.396759422331807, "grad_norm": 1.1554845837002312, "learning_rate": 1.3736002615121722e-05, "loss": 0.9351, "step": 2816 }, { "epoch": 0.39690031701303274, "grad_norm": 0.9558904764086011, "learning_rate": 1.373176906336614e-05, "loss": 0.8878, "step": 2817 }, { "epoch": 0.39704121169425854, "grad_norm": 0.9414000596554676, "learning_rate": 1.3727534734410588e-05, "loss": 0.8818, "step": 2818 }, { "epoch": 0.39718210637548435, "grad_norm": 0.9710738946949171, "learning_rate": 1.3723299629136935e-05, "loss": 0.9414, "step": 2819 }, { "epoch": 0.3973230010567101, "grad_norm": 0.9950883058667345, "learning_rate": 1.3719063748427203e-05, "loss": 0.8921, "step": 2820 }, { "epoch": 0.3974638957379359, "grad_norm": 0.9279388381406066, "learning_rate": 1.3714827093163585e-05, "loss": 0.9273, "step": 2821 }, { "epoch": 0.3976047904191617, "grad_norm": 0.9153686910513034, "learning_rate": 1.3710589664228435e-05, "loss": 0.8838, "step": 2822 }, { "epoch": 0.39774568510038744, "grad_norm": 1.3045419593029974, "learning_rate": 1.3706351462504259e-05, "loss": 0.5142, "step": 2823 }, { "epoch": 0.39788657978161324, "grad_norm": 1.2426455843382633, "learning_rate": 1.370211248887373e-05, "loss": 0.4438, "step": 2824 }, { "epoch": 0.39802747446283904, "grad_norm": 0.9679785335486069, "learning_rate": 1.369787274421969e-05, "loss": 0.8766, "step": 2825 }, { "epoch": 0.39816836914406484, "grad_norm": 1.3660029983706274, "learning_rate": 1.3693632229425118e-05, "loss": 0.4412, "step": 2826 }, { "epoch": 0.3983092638252906, "grad_norm": 1.0369757065268144, "learning_rate": 1.368939094537318e-05, "loss": 0.9292, "step": 2827 }, { "epoch": 0.3984501585065164, "grad_norm": 1.1146349279759098, "learning_rate": 1.3685148892947187e-05, "loss": 0.8278, "step": 2828 }, { "epoch": 0.3985910531877422, "grad_norm": 0.9324988680085889, "learning_rate": 1.3680906073030611e-05, "loss": 0.8546, "step": 2829 }, { "epoch": 0.39873194786896793, "grad_norm": 1.0273566177982687, "learning_rate": 1.367666248650709e-05, "loss": 0.8929, "step": 2830 }, { "epoch": 0.39887284255019373, "grad_norm": 1.0444534611494332, "learning_rate": 1.3672418134260417e-05, "loss": 0.8892, "step": 2831 }, { "epoch": 0.39901373723141953, "grad_norm": 0.9498318601195002, "learning_rate": 1.3668173017174547e-05, "loss": 0.8845, "step": 2832 }, { "epoch": 0.3991546319126453, "grad_norm": 1.4784429460774111, "learning_rate": 1.366392713613359e-05, "loss": 0.6199, "step": 2833 }, { "epoch": 0.3992955265938711, "grad_norm": 0.9338845369588314, "learning_rate": 1.3659680492021816e-05, "loss": 0.8901, "step": 2834 }, { "epoch": 0.3994364212750969, "grad_norm": 1.4760078956365543, "learning_rate": 1.3655433085723665e-05, "loss": 0.4616, "step": 2835 }, { "epoch": 0.3995773159563227, "grad_norm": 1.0158835958572083, "learning_rate": 1.3651184918123715e-05, "loss": 0.8552, "step": 2836 }, { "epoch": 0.3997182106375484, "grad_norm": 0.9482650922122267, "learning_rate": 1.3646935990106723e-05, "loss": 0.8596, "step": 2837 }, { "epoch": 0.3998591053187742, "grad_norm": 1.2625392427805624, "learning_rate": 1.3642686302557593e-05, "loss": 0.4968, "step": 2838 }, { "epoch": 0.4, "grad_norm": 1.2304678935612927, "learning_rate": 1.3638435856361384e-05, "loss": 0.4496, "step": 2839 }, { "epoch": 0.40014089468122577, "grad_norm": 0.9105625877842434, "learning_rate": 1.3634184652403328e-05, "loss": 0.866, "step": 2840 }, { "epoch": 0.40028178936245157, "grad_norm": 1.3591102180393448, "learning_rate": 1.3629932691568799e-05, "loss": 0.6645, "step": 2841 }, { "epoch": 0.40042268404367737, "grad_norm": 1.278765089627431, "learning_rate": 1.3625679974743337e-05, "loss": 0.5054, "step": 2842 }, { "epoch": 0.4005635787249031, "grad_norm": 1.3185602071426303, "learning_rate": 1.362142650281264e-05, "loss": 0.4923, "step": 2843 }, { "epoch": 0.4007044734061289, "grad_norm": 0.9670163299518546, "learning_rate": 1.3617172276662556e-05, "loss": 0.9072, "step": 2844 }, { "epoch": 0.4008453680873547, "grad_norm": 1.2586817727806021, "learning_rate": 1.36129172971791e-05, "loss": 0.5491, "step": 2845 }, { "epoch": 0.40098626276858046, "grad_norm": 0.9808490371159795, "learning_rate": 1.3608661565248436e-05, "loss": 0.8539, "step": 2846 }, { "epoch": 0.40112715744980626, "grad_norm": 1.2719382323422512, "learning_rate": 1.3604405081756884e-05, "loss": 0.5184, "step": 2847 }, { "epoch": 0.40126805213103206, "grad_norm": 1.2285169320948546, "learning_rate": 1.3600147847590933e-05, "loss": 0.5082, "step": 2848 }, { "epoch": 0.40140894681225786, "grad_norm": 1.452276359731451, "learning_rate": 1.359588986363721e-05, "loss": 0.5253, "step": 2849 }, { "epoch": 0.4015498414934836, "grad_norm": 1.3121777004493973, "learning_rate": 1.3591631130782516e-05, "loss": 0.8676, "step": 2850 }, { "epoch": 0.4016907361747094, "grad_norm": 1.0341724429437424, "learning_rate": 1.3587371649913795e-05, "loss": 0.888, "step": 2851 }, { "epoch": 0.4018316308559352, "grad_norm": 1.0798336178428798, "learning_rate": 1.3583111421918147e-05, "loss": 0.8684, "step": 2852 }, { "epoch": 0.40197252553716095, "grad_norm": 1.2013140539970237, "learning_rate": 1.3578850447682842e-05, "loss": 0.6078, "step": 2853 }, { "epoch": 0.40211342021838675, "grad_norm": 0.9276622625268469, "learning_rate": 1.3574588728095285e-05, "loss": 0.8469, "step": 2854 }, { "epoch": 0.40225431489961255, "grad_norm": 0.9982355396389425, "learning_rate": 1.3570326264043054e-05, "loss": 0.9016, "step": 2855 }, { "epoch": 0.4023952095808383, "grad_norm": 1.0038643370718885, "learning_rate": 1.3566063056413874e-05, "loss": 0.841, "step": 2856 }, { "epoch": 0.4025361042620641, "grad_norm": 1.540230963900897, "learning_rate": 1.3561799106095622e-05, "loss": 0.4863, "step": 2857 }, { "epoch": 0.4026769989432899, "grad_norm": 1.3327526013624107, "learning_rate": 1.3557534413976336e-05, "loss": 0.5614, "step": 2858 }, { "epoch": 0.4028178936245157, "grad_norm": 0.8951827182324639, "learning_rate": 1.3553268980944201e-05, "loss": 0.8827, "step": 2859 }, { "epoch": 0.40295878830574144, "grad_norm": 1.2314393666441028, "learning_rate": 1.3549002807887566e-05, "loss": 0.5709, "step": 2860 }, { "epoch": 0.40309968298696724, "grad_norm": 0.9432904726421633, "learning_rate": 1.3544735895694929e-05, "loss": 0.9013, "step": 2861 }, { "epoch": 0.40324057766819305, "grad_norm": 0.9934863242919104, "learning_rate": 1.3540468245254938e-05, "loss": 0.9581, "step": 2862 }, { "epoch": 0.4033814723494188, "grad_norm": 1.3295975277750078, "learning_rate": 1.3536199857456402e-05, "loss": 0.622, "step": 2863 }, { "epoch": 0.4035223670306446, "grad_norm": 0.9903419908666273, "learning_rate": 1.3531930733188279e-05, "loss": 0.9129, "step": 2864 }, { "epoch": 0.4036632617118704, "grad_norm": 1.0472224763743565, "learning_rate": 1.3527660873339681e-05, "loss": 0.8066, "step": 2865 }, { "epoch": 0.40380415639309614, "grad_norm": 1.119216119309583, "learning_rate": 1.3523390278799879e-05, "loss": 0.9471, "step": 2866 }, { "epoch": 0.40394505107432194, "grad_norm": 0.9880312713822024, "learning_rate": 1.3519118950458286e-05, "loss": 0.865, "step": 2867 }, { "epoch": 0.40408594575554774, "grad_norm": 0.9912882925716179, "learning_rate": 1.3514846889204474e-05, "loss": 0.9372, "step": 2868 }, { "epoch": 0.40422684043677354, "grad_norm": 0.9434188839832286, "learning_rate": 1.3510574095928168e-05, "loss": 0.9464, "step": 2869 }, { "epoch": 0.4043677351179993, "grad_norm": 1.0511492249612728, "learning_rate": 1.3506300571519248e-05, "loss": 0.9355, "step": 2870 }, { "epoch": 0.4045086297992251, "grad_norm": 1.294030288886367, "learning_rate": 1.3502026316867743e-05, "loss": 0.4707, "step": 2871 }, { "epoch": 0.4046495244804509, "grad_norm": 1.4126421925068853, "learning_rate": 1.3497751332863827e-05, "loss": 0.5206, "step": 2872 }, { "epoch": 0.40479041916167663, "grad_norm": 0.9616114357823274, "learning_rate": 1.3493475620397843e-05, "loss": 0.8966, "step": 2873 }, { "epoch": 0.40493131384290243, "grad_norm": 1.3193029763384299, "learning_rate": 1.3489199180360266e-05, "loss": 0.4947, "step": 2874 }, { "epoch": 0.40507220852412823, "grad_norm": 0.9935313071523557, "learning_rate": 1.3484922013641739e-05, "loss": 0.8625, "step": 2875 }, { "epoch": 0.405213103205354, "grad_norm": 1.4351977121752861, "learning_rate": 1.3480644121133052e-05, "loss": 0.8797, "step": 2876 }, { "epoch": 0.4053539978865798, "grad_norm": 1.011368933531061, "learning_rate": 1.3476365503725135e-05, "loss": 0.8606, "step": 2877 }, { "epoch": 0.4054948925678056, "grad_norm": 1.3344981197506076, "learning_rate": 1.3472086162309083e-05, "loss": 0.6044, "step": 2878 }, { "epoch": 0.4056357872490314, "grad_norm": 1.0839375889205123, "learning_rate": 1.346780609777614e-05, "loss": 0.9747, "step": 2879 }, { "epoch": 0.4057766819302571, "grad_norm": 1.0021500600797826, "learning_rate": 1.3463525311017689e-05, "loss": 0.895, "step": 2880 }, { "epoch": 0.4059175766114829, "grad_norm": 0.9964408629891148, "learning_rate": 1.3459243802925278e-05, "loss": 0.8821, "step": 2881 }, { "epoch": 0.4060584712927087, "grad_norm": 0.9439985639239167, "learning_rate": 1.3454961574390595e-05, "loss": 0.8988, "step": 2882 }, { "epoch": 0.40619936597393447, "grad_norm": 1.3551568612686917, "learning_rate": 1.3450678626305485e-05, "loss": 0.62, "step": 2883 }, { "epoch": 0.40634026065516027, "grad_norm": 1.026983975268147, "learning_rate": 1.344639495956194e-05, "loss": 0.8836, "step": 2884 }, { "epoch": 0.40648115533638607, "grad_norm": 1.4475179977612267, "learning_rate": 1.3442110575052096e-05, "loss": 0.5387, "step": 2885 }, { "epoch": 0.4066220500176118, "grad_norm": 1.204915459202544, "learning_rate": 1.343782547366825e-05, "loss": 0.46, "step": 2886 }, { "epoch": 0.4067629446988376, "grad_norm": 1.0443339538987093, "learning_rate": 1.3433539656302838e-05, "loss": 0.895, "step": 2887 }, { "epoch": 0.4069038393800634, "grad_norm": 1.0988637198949585, "learning_rate": 1.3429253123848452e-05, "loss": 0.9126, "step": 2888 }, { "epoch": 0.40704473406128916, "grad_norm": 1.0474792739321812, "learning_rate": 1.3424965877197829e-05, "loss": 0.9157, "step": 2889 }, { "epoch": 0.40718562874251496, "grad_norm": 0.9104662885378926, "learning_rate": 1.3420677917243856e-05, "loss": 0.8385, "step": 2890 }, { "epoch": 0.40732652342374076, "grad_norm": 1.3932349974049845, "learning_rate": 1.3416389244879567e-05, "loss": 0.5933, "step": 2891 }, { "epoch": 0.40746741810496656, "grad_norm": 1.3664500861615905, "learning_rate": 1.3412099860998151e-05, "loss": 0.537, "step": 2892 }, { "epoch": 0.4076083127861923, "grad_norm": 0.982582863687689, "learning_rate": 1.3407809766492937e-05, "loss": 0.8767, "step": 2893 }, { "epoch": 0.4077492074674181, "grad_norm": 0.97728937604874, "learning_rate": 1.3403518962257403e-05, "loss": 0.904, "step": 2894 }, { "epoch": 0.4078901021486439, "grad_norm": 1.0212225180131655, "learning_rate": 1.3399227449185178e-05, "loss": 0.8571, "step": 2895 }, { "epoch": 0.40803099682986965, "grad_norm": 1.168676899553815, "learning_rate": 1.3394935228170039e-05, "loss": 0.4475, "step": 2896 }, { "epoch": 0.40817189151109545, "grad_norm": 1.443993799209864, "learning_rate": 1.339064230010591e-05, "loss": 0.6144, "step": 2897 }, { "epoch": 0.40831278619232125, "grad_norm": 1.0299076814919184, "learning_rate": 1.3386348665886854e-05, "loss": 0.9119, "step": 2898 }, { "epoch": 0.408453680873547, "grad_norm": 1.2502561399681462, "learning_rate": 1.3382054326407098e-05, "loss": 0.4589, "step": 2899 }, { "epoch": 0.4085945755547728, "grad_norm": 0.9477554572629667, "learning_rate": 1.3377759282561003e-05, "loss": 0.927, "step": 2900 }, { "epoch": 0.4087354702359986, "grad_norm": 1.4125079198512256, "learning_rate": 1.3373463535243074e-05, "loss": 0.5241, "step": 2901 }, { "epoch": 0.4088763649172244, "grad_norm": 0.8888966516040115, "learning_rate": 1.3369167085347979e-05, "loss": 0.8674, "step": 2902 }, { "epoch": 0.40901725959845014, "grad_norm": 0.982819436018762, "learning_rate": 1.3364869933770512e-05, "loss": 0.8674, "step": 2903 }, { "epoch": 0.40915815427967595, "grad_norm": 0.8897199125100719, "learning_rate": 1.336057208140563e-05, "loss": 0.9356, "step": 2904 }, { "epoch": 0.40929904896090175, "grad_norm": 1.084656396367269, "learning_rate": 1.335627352914842e-05, "loss": 0.9034, "step": 2905 }, { "epoch": 0.4094399436421275, "grad_norm": 1.3580226466416876, "learning_rate": 1.3351974277894131e-05, "loss": 0.5716, "step": 2906 }, { "epoch": 0.4095808383233533, "grad_norm": 1.2775703536494991, "learning_rate": 1.3347674328538149e-05, "loss": 0.5007, "step": 2907 }, { "epoch": 0.4097217330045791, "grad_norm": 1.2377235360618217, "learning_rate": 1.3343373681976004e-05, "loss": 0.484, "step": 2908 }, { "epoch": 0.40986262768580484, "grad_norm": 1.0417556764833749, "learning_rate": 1.3339072339103377e-05, "loss": 0.9105, "step": 2909 }, { "epoch": 0.41000352236703064, "grad_norm": 0.9444462358197159, "learning_rate": 1.3334770300816088e-05, "loss": 0.887, "step": 2910 }, { "epoch": 0.41014441704825644, "grad_norm": 1.0113794154214473, "learning_rate": 1.3330467568010107e-05, "loss": 0.918, "step": 2911 }, { "epoch": 0.41028531172948224, "grad_norm": 0.9483168290280449, "learning_rate": 1.3326164141581543e-05, "loss": 0.8502, "step": 2912 }, { "epoch": 0.410426206410708, "grad_norm": 1.3327632562973868, "learning_rate": 1.3321860022426653e-05, "loss": 0.5322, "step": 2913 }, { "epoch": 0.4105671010919338, "grad_norm": 1.4219602202062065, "learning_rate": 1.331755521144184e-05, "loss": 0.5463, "step": 2914 }, { "epoch": 0.4107079957731596, "grad_norm": 1.0007616190626323, "learning_rate": 1.3313249709523652e-05, "loss": 0.9402, "step": 2915 }, { "epoch": 0.41084889045438533, "grad_norm": 1.050387601618243, "learning_rate": 1.330894351756877e-05, "loss": 0.8773, "step": 2916 }, { "epoch": 0.41098978513561113, "grad_norm": 0.9613263103043179, "learning_rate": 1.3304636636474033e-05, "loss": 0.8762, "step": 2917 }, { "epoch": 0.41113067981683693, "grad_norm": 1.12448802524892, "learning_rate": 1.3300329067136416e-05, "loss": 0.8833, "step": 2918 }, { "epoch": 0.4112715744980627, "grad_norm": 1.0782693085064508, "learning_rate": 1.3296020810453034e-05, "loss": 0.9026, "step": 2919 }, { "epoch": 0.4114124691792885, "grad_norm": 1.200007891842742, "learning_rate": 1.3291711867321157e-05, "loss": 0.4526, "step": 2920 }, { "epoch": 0.4115533638605143, "grad_norm": 1.1396230755285084, "learning_rate": 1.3287402238638185e-05, "loss": 0.9285, "step": 2921 }, { "epoch": 0.4116942585417401, "grad_norm": 1.0702795012805861, "learning_rate": 1.3283091925301668e-05, "loss": 0.9412, "step": 2922 }, { "epoch": 0.4118351532229658, "grad_norm": 1.2788149106288533, "learning_rate": 1.3278780928209298e-05, "loss": 0.5758, "step": 2923 }, { "epoch": 0.4119760479041916, "grad_norm": 1.0748926606368407, "learning_rate": 1.3274469248258907e-05, "loss": 0.8794, "step": 2924 }, { "epoch": 0.4121169425854174, "grad_norm": 1.2856824874171016, "learning_rate": 1.3270156886348473e-05, "loss": 0.4474, "step": 2925 }, { "epoch": 0.41225783726664317, "grad_norm": 1.1992057032598984, "learning_rate": 1.3265843843376106e-05, "loss": 0.9068, "step": 2926 }, { "epoch": 0.41239873194786897, "grad_norm": 1.016351645293174, "learning_rate": 1.3261530120240076e-05, "loss": 0.8262, "step": 2927 }, { "epoch": 0.41253962662909477, "grad_norm": 1.1350304270316616, "learning_rate": 1.3257215717838778e-05, "loss": 0.9081, "step": 2928 }, { "epoch": 0.4126805213103205, "grad_norm": 0.9931787716194581, "learning_rate": 1.3252900637070755e-05, "loss": 0.8689, "step": 2929 }, { "epoch": 0.4128214159915463, "grad_norm": 1.0828959977134156, "learning_rate": 1.3248584878834694e-05, "loss": 0.9068, "step": 2930 }, { "epoch": 0.4129623106727721, "grad_norm": 1.3259152443849538, "learning_rate": 1.3244268444029415e-05, "loss": 0.6056, "step": 2931 }, { "epoch": 0.41310320535399786, "grad_norm": 0.9846620447296233, "learning_rate": 1.3239951333553887e-05, "loss": 0.8663, "step": 2932 }, { "epoch": 0.41324410003522366, "grad_norm": 0.9443552946578181, "learning_rate": 1.3235633548307218e-05, "loss": 0.8513, "step": 2933 }, { "epoch": 0.41338499471644946, "grad_norm": 1.3082935023605353, "learning_rate": 1.3231315089188652e-05, "loss": 0.6251, "step": 2934 }, { "epoch": 0.41352588939767526, "grad_norm": 0.9231409380492529, "learning_rate": 1.3226995957097578e-05, "loss": 0.8639, "step": 2935 }, { "epoch": 0.413666784078901, "grad_norm": 1.185707680048714, "learning_rate": 1.3222676152933522e-05, "loss": 0.4561, "step": 2936 }, { "epoch": 0.4138076787601268, "grad_norm": 1.0738654398140992, "learning_rate": 1.3218355677596153e-05, "loss": 0.8851, "step": 2937 }, { "epoch": 0.4139485734413526, "grad_norm": 0.907272827907801, "learning_rate": 1.321403453198528e-05, "loss": 0.8322, "step": 2938 }, { "epoch": 0.41408946812257835, "grad_norm": 1.4440210577238963, "learning_rate": 1.3209712717000846e-05, "loss": 0.5535, "step": 2939 }, { "epoch": 0.41423036280380415, "grad_norm": 1.2829138538600202, "learning_rate": 1.320539023354294e-05, "loss": 0.6681, "step": 2940 }, { "epoch": 0.41437125748502995, "grad_norm": 0.9910299028261846, "learning_rate": 1.3201067082511791e-05, "loss": 0.8439, "step": 2941 }, { "epoch": 0.4145121521662557, "grad_norm": 1.0084965819503036, "learning_rate": 1.3196743264807756e-05, "loss": 0.8746, "step": 2942 }, { "epoch": 0.4146530468474815, "grad_norm": 1.0949456480585, "learning_rate": 1.3192418781331344e-05, "loss": 0.8188, "step": 2943 }, { "epoch": 0.4147939415287073, "grad_norm": 1.2087458503137154, "learning_rate": 1.3188093632983194e-05, "loss": 0.5002, "step": 2944 }, { "epoch": 0.4149348362099331, "grad_norm": 1.2620868667654284, "learning_rate": 1.3183767820664089e-05, "loss": 0.5133, "step": 2945 }, { "epoch": 0.41507573089115885, "grad_norm": 1.2531304186923775, "learning_rate": 1.3179441345274946e-05, "loss": 0.5442, "step": 2946 }, { "epoch": 0.41521662557238465, "grad_norm": 1.2029752380827425, "learning_rate": 1.3175114207716821e-05, "loss": 0.8699, "step": 2947 }, { "epoch": 0.41535752025361045, "grad_norm": 1.1297522588410926, "learning_rate": 1.3170786408890916e-05, "loss": 0.9263, "step": 2948 }, { "epoch": 0.4154984149348362, "grad_norm": 0.943662022379883, "learning_rate": 1.3166457949698554e-05, "loss": 0.9226, "step": 2949 }, { "epoch": 0.415639309616062, "grad_norm": 1.5248523480634546, "learning_rate": 1.316212883104121e-05, "loss": 0.6861, "step": 2950 }, { "epoch": 0.4157802042972878, "grad_norm": 0.9684202722071132, "learning_rate": 1.3157799053820492e-05, "loss": 0.925, "step": 2951 }, { "epoch": 0.41592109897851354, "grad_norm": 1.2199076685710422, "learning_rate": 1.3153468618938143e-05, "loss": 0.4779, "step": 2952 }, { "epoch": 0.41606199365973934, "grad_norm": 1.305979745141546, "learning_rate": 1.3149137527296047e-05, "loss": 0.9308, "step": 2953 }, { "epoch": 0.41620288834096514, "grad_norm": 0.9766922099742955, "learning_rate": 1.3144805779796218e-05, "loss": 0.8946, "step": 2954 }, { "epoch": 0.41634378302219094, "grad_norm": 1.0322994544197361, "learning_rate": 1.3140473377340815e-05, "loss": 0.8693, "step": 2955 }, { "epoch": 0.4164846777034167, "grad_norm": 0.9754580643169198, "learning_rate": 1.3136140320832127e-05, "loss": 0.8879, "step": 2956 }, { "epoch": 0.4166255723846425, "grad_norm": 1.0768866484453852, "learning_rate": 1.3131806611172581e-05, "loss": 0.8958, "step": 2957 }, { "epoch": 0.4167664670658683, "grad_norm": 1.3270049010556946, "learning_rate": 1.3127472249264748e-05, "loss": 0.5285, "step": 2958 }, { "epoch": 0.41690736174709403, "grad_norm": 0.9334046182519009, "learning_rate": 1.3123137236011317e-05, "loss": 0.8959, "step": 2959 }, { "epoch": 0.41704825642831983, "grad_norm": 1.0323449267469726, "learning_rate": 1.3118801572315126e-05, "loss": 0.8785, "step": 2960 }, { "epoch": 0.41718915110954563, "grad_norm": 0.9594255677399544, "learning_rate": 1.3114465259079153e-05, "loss": 0.9356, "step": 2961 }, { "epoch": 0.4173300457907714, "grad_norm": 1.1836395201139656, "learning_rate": 1.311012829720649e-05, "loss": 0.5017, "step": 2962 }, { "epoch": 0.4174709404719972, "grad_norm": 1.0409529449346526, "learning_rate": 1.3105790687600393e-05, "loss": 0.9439, "step": 2963 }, { "epoch": 0.417611835153223, "grad_norm": 1.0262979872067373, "learning_rate": 1.310145243116423e-05, "loss": 0.8827, "step": 2964 }, { "epoch": 0.4177527298344488, "grad_norm": 1.7786692687137655, "learning_rate": 1.3097113528801511e-05, "loss": 0.5256, "step": 2965 }, { "epoch": 0.4178936245156745, "grad_norm": 0.9970612161773168, "learning_rate": 1.3092773981415882e-05, "loss": 0.8771, "step": 2966 }, { "epoch": 0.4180345191969003, "grad_norm": 1.3179136815121264, "learning_rate": 1.3088433789911126e-05, "loss": 0.585, "step": 2967 }, { "epoch": 0.4181754138781261, "grad_norm": 1.0306845713071549, "learning_rate": 1.308409295519115e-05, "loss": 0.8737, "step": 2968 }, { "epoch": 0.41831630855935187, "grad_norm": 1.0037036454161257, "learning_rate": 1.3079751478160008e-05, "loss": 0.8968, "step": 2969 }, { "epoch": 0.41845720324057767, "grad_norm": 1.0902972530793962, "learning_rate": 1.3075409359721875e-05, "loss": 0.8939, "step": 2970 }, { "epoch": 0.41859809792180347, "grad_norm": 1.2100528604929506, "learning_rate": 1.307106660078107e-05, "loss": 0.4527, "step": 2971 }, { "epoch": 0.4187389926030292, "grad_norm": 1.3608891504232428, "learning_rate": 1.3066723202242045e-05, "loss": 0.4752, "step": 2972 }, { "epoch": 0.418879887284255, "grad_norm": 1.0367166387180362, "learning_rate": 1.3062379165009373e-05, "loss": 0.9111, "step": 2973 }, { "epoch": 0.4190207819654808, "grad_norm": 1.0853947872234668, "learning_rate": 1.3058034489987774e-05, "loss": 0.8348, "step": 2974 }, { "epoch": 0.41916167664670656, "grad_norm": 0.9787156860220229, "learning_rate": 1.3053689178082094e-05, "loss": 0.8183, "step": 2975 }, { "epoch": 0.41930257132793236, "grad_norm": 1.1319081362933658, "learning_rate": 1.304934323019731e-05, "loss": 0.4236, "step": 2976 }, { "epoch": 0.41944346600915816, "grad_norm": 0.974036714568953, "learning_rate": 1.3044996647238541e-05, "loss": 0.8808, "step": 2977 }, { "epoch": 0.41958436069038396, "grad_norm": 0.9885287061925842, "learning_rate": 1.3040649430111027e-05, "loss": 0.8788, "step": 2978 }, { "epoch": 0.4197252553716097, "grad_norm": 1.3699166213029097, "learning_rate": 1.3036301579720147e-05, "loss": 0.5047, "step": 2979 }, { "epoch": 0.4198661500528355, "grad_norm": 1.0410247627164586, "learning_rate": 1.3031953096971406e-05, "loss": 0.8693, "step": 2980 }, { "epoch": 0.4200070447340613, "grad_norm": 0.9769340355156907, "learning_rate": 1.3027603982770447e-05, "loss": 0.8592, "step": 2981 }, { "epoch": 0.42014793941528705, "grad_norm": 1.0673494619046768, "learning_rate": 1.3023254238023045e-05, "loss": 0.909, "step": 2982 }, { "epoch": 0.42028883409651285, "grad_norm": 1.2938626159514683, "learning_rate": 1.3018903863635094e-05, "loss": 0.5612, "step": 2983 }, { "epoch": 0.42042972877773865, "grad_norm": 1.0459670867295503, "learning_rate": 1.3014552860512639e-05, "loss": 0.8193, "step": 2984 }, { "epoch": 0.4205706234589644, "grad_norm": 0.9457737342832829, "learning_rate": 1.3010201229561838e-05, "loss": 0.8706, "step": 2985 }, { "epoch": 0.4207115181401902, "grad_norm": 0.96234966528544, "learning_rate": 1.300584897168899e-05, "loss": 0.9178, "step": 2986 }, { "epoch": 0.420852412821416, "grad_norm": 1.3226926141498956, "learning_rate": 1.3001496087800525e-05, "loss": 0.5728, "step": 2987 }, { "epoch": 0.4209933075026418, "grad_norm": 1.202779946790118, "learning_rate": 1.2997142578802991e-05, "loss": 0.6098, "step": 2988 }, { "epoch": 0.42113420218386755, "grad_norm": 1.3466044308917102, "learning_rate": 1.2992788445603082e-05, "loss": 0.5559, "step": 2989 }, { "epoch": 0.42127509686509335, "grad_norm": 1.053604445098221, "learning_rate": 1.2988433689107613e-05, "loss": 0.9229, "step": 2990 }, { "epoch": 0.42141599154631915, "grad_norm": 1.0481185111716118, "learning_rate": 1.298407831022353e-05, "loss": 0.8974, "step": 2991 }, { "epoch": 0.4215568862275449, "grad_norm": 0.9701715402594552, "learning_rate": 1.2979722309857912e-05, "loss": 0.934, "step": 2992 }, { "epoch": 0.4216977809087707, "grad_norm": 1.369271100156201, "learning_rate": 1.2975365688917965e-05, "loss": 0.5013, "step": 2993 }, { "epoch": 0.4218386755899965, "grad_norm": 0.9497705474163499, "learning_rate": 1.297100844831102e-05, "loss": 0.8783, "step": 2994 }, { "epoch": 0.42197957027122224, "grad_norm": 1.1181502019502192, "learning_rate": 1.2966650588944547e-05, "loss": 0.9375, "step": 2995 }, { "epoch": 0.42212046495244804, "grad_norm": 0.9759210678958374, "learning_rate": 1.2962292111726135e-05, "loss": 0.7513, "step": 2996 }, { "epoch": 0.42226135963367384, "grad_norm": 1.023449706329928, "learning_rate": 1.2957933017563511e-05, "loss": 0.8845, "step": 2997 }, { "epoch": 0.42240225431489964, "grad_norm": 1.3356681614964, "learning_rate": 1.2953573307364518e-05, "loss": 0.5722, "step": 2998 }, { "epoch": 0.4225431489961254, "grad_norm": 1.0264654086441611, "learning_rate": 1.2949212982037138e-05, "loss": 0.8653, "step": 2999 }, { "epoch": 0.4226840436773512, "grad_norm": 1.1748183982052225, "learning_rate": 1.2944852042489482e-05, "loss": 0.5767, "step": 3000 }, { "epoch": 0.422824938358577, "grad_norm": 1.021405154914894, "learning_rate": 1.2940490489629777e-05, "loss": 0.9342, "step": 3001 }, { "epoch": 0.42296583303980273, "grad_norm": 1.3464275996380655, "learning_rate": 1.2936128324366393e-05, "loss": 0.5052, "step": 3002 }, { "epoch": 0.42310672772102853, "grad_norm": 0.9490344447525305, "learning_rate": 1.2931765547607815e-05, "loss": 0.9051, "step": 3003 }, { "epoch": 0.42324762240225433, "grad_norm": 1.0500667852282326, "learning_rate": 1.292740216026266e-05, "loss": 0.8525, "step": 3004 }, { "epoch": 0.4233885170834801, "grad_norm": 1.0550006795953975, "learning_rate": 1.2923038163239678e-05, "loss": 0.9113, "step": 3005 }, { "epoch": 0.4235294117647059, "grad_norm": 0.9693833754660799, "learning_rate": 1.2918673557447734e-05, "loss": 0.913, "step": 3006 }, { "epoch": 0.4236703064459317, "grad_norm": 1.2913712087914548, "learning_rate": 1.2914308343795828e-05, "loss": 0.498, "step": 3007 }, { "epoch": 0.4238112011271575, "grad_norm": 1.3472850406490444, "learning_rate": 1.2909942523193086e-05, "loss": 0.5806, "step": 3008 }, { "epoch": 0.4239520958083832, "grad_norm": 1.2639783490614072, "learning_rate": 1.2905576096548757e-05, "loss": 0.5725, "step": 3009 }, { "epoch": 0.424092990489609, "grad_norm": 1.0230959017583459, "learning_rate": 1.2901209064772224e-05, "loss": 0.9196, "step": 3010 }, { "epoch": 0.4242338851708348, "grad_norm": 1.2136810068950372, "learning_rate": 1.2896841428772984e-05, "loss": 0.5083, "step": 3011 }, { "epoch": 0.42437477985206057, "grad_norm": 1.160222018605727, "learning_rate": 1.289247318946067e-05, "loss": 0.4717, "step": 3012 }, { "epoch": 0.42451567453328637, "grad_norm": 0.8968110143168052, "learning_rate": 1.2888104347745038e-05, "loss": 0.8235, "step": 3013 }, { "epoch": 0.42465656921451217, "grad_norm": 0.9769614928778635, "learning_rate": 1.2883734904535962e-05, "loss": 0.816, "step": 3014 }, { "epoch": 0.4247974638957379, "grad_norm": 0.9111802723102114, "learning_rate": 1.2879364860743457e-05, "loss": 0.8507, "step": 3015 }, { "epoch": 0.4249383585769637, "grad_norm": 1.2621647975051158, "learning_rate": 1.2874994217277646e-05, "loss": 0.904, "step": 3016 }, { "epoch": 0.4250792532581895, "grad_norm": 1.3739492341805426, "learning_rate": 1.2870622975048787e-05, "loss": 0.5211, "step": 3017 }, { "epoch": 0.42522014793941526, "grad_norm": 1.063588982553805, "learning_rate": 1.2866251134967263e-05, "loss": 0.9077, "step": 3018 }, { "epoch": 0.42536104262064106, "grad_norm": 1.1087660878339116, "learning_rate": 1.2861878697943576e-05, "loss": 0.8775, "step": 3019 }, { "epoch": 0.42550193730186686, "grad_norm": 1.043024805141828, "learning_rate": 1.2857505664888357e-05, "loss": 0.9317, "step": 3020 }, { "epoch": 0.42564283198309266, "grad_norm": 0.9865247810407181, "learning_rate": 1.2853132036712358e-05, "loss": 0.9059, "step": 3021 }, { "epoch": 0.4257837266643184, "grad_norm": 1.2695134506714465, "learning_rate": 1.2848757814326456e-05, "loss": 0.4727, "step": 3022 }, { "epoch": 0.4259246213455442, "grad_norm": 0.9433772827278972, "learning_rate": 1.2844382998641654e-05, "loss": 0.828, "step": 3023 }, { "epoch": 0.42606551602677, "grad_norm": 1.073083261847865, "learning_rate": 1.2840007590569075e-05, "loss": 0.9077, "step": 3024 }, { "epoch": 0.42620641070799575, "grad_norm": 0.9308943612431019, "learning_rate": 1.2835631591019966e-05, "loss": 0.8269, "step": 3025 }, { "epoch": 0.42634730538922155, "grad_norm": 0.9252967798867002, "learning_rate": 1.28312550009057e-05, "loss": 0.8408, "step": 3026 }, { "epoch": 0.42648820007044735, "grad_norm": 1.1144891897465248, "learning_rate": 1.282687782113777e-05, "loss": 0.8744, "step": 3027 }, { "epoch": 0.4266290947516731, "grad_norm": 1.012337448622646, "learning_rate": 1.2822500052627798e-05, "loss": 0.9001, "step": 3028 }, { "epoch": 0.4267699894328989, "grad_norm": 0.9187469126868287, "learning_rate": 1.2818121696287516e-05, "loss": 0.8152, "step": 3029 }, { "epoch": 0.4269108841141247, "grad_norm": 1.2505670703834857, "learning_rate": 1.281374275302879e-05, "loss": 0.4985, "step": 3030 }, { "epoch": 0.4270517787953505, "grad_norm": 1.1936513790362873, "learning_rate": 1.2809363223763601e-05, "loss": 0.5732, "step": 3031 }, { "epoch": 0.42719267347657625, "grad_norm": 1.3371709213166056, "learning_rate": 1.2804983109404058e-05, "loss": 0.6046, "step": 3032 }, { "epoch": 0.42733356815780205, "grad_norm": 1.0175530722435207, "learning_rate": 1.2800602410862392e-05, "loss": 0.8367, "step": 3033 }, { "epoch": 0.42747446283902785, "grad_norm": 1.0441067304145346, "learning_rate": 1.2796221129050947e-05, "loss": 0.9206, "step": 3034 }, { "epoch": 0.4276153575202536, "grad_norm": 0.9633349444402002, "learning_rate": 1.2791839264882197e-05, "loss": 0.9068, "step": 3035 }, { "epoch": 0.4277562522014794, "grad_norm": 1.0890996301390625, "learning_rate": 1.2787456819268739e-05, "loss": 0.9147, "step": 3036 }, { "epoch": 0.4278971468827052, "grad_norm": 1.0985770174164204, "learning_rate": 1.2783073793123277e-05, "loss": 0.8223, "step": 3037 }, { "epoch": 0.42803804156393094, "grad_norm": 0.9340362631519723, "learning_rate": 1.2778690187358653e-05, "loss": 0.9177, "step": 3038 }, { "epoch": 0.42817893624515674, "grad_norm": 0.9690241178378988, "learning_rate": 1.2774306002887822e-05, "loss": 0.9337, "step": 3039 }, { "epoch": 0.42831983092638254, "grad_norm": 1.2563385360067376, "learning_rate": 1.2769921240623853e-05, "loss": 0.5361, "step": 3040 }, { "epoch": 0.42846072560760834, "grad_norm": 1.0129513681030766, "learning_rate": 1.2765535901479951e-05, "loss": 0.8985, "step": 3041 }, { "epoch": 0.4286016202888341, "grad_norm": 0.9748270283401226, "learning_rate": 1.2761149986369426e-05, "loss": 0.8978, "step": 3042 }, { "epoch": 0.4287425149700599, "grad_norm": 0.9736477595415942, "learning_rate": 1.2756763496205719e-05, "loss": 0.8721, "step": 3043 }, { "epoch": 0.4288834096512857, "grad_norm": 0.9872191782107608, "learning_rate": 1.275237643190238e-05, "loss": 0.8317, "step": 3044 }, { "epoch": 0.42902430433251143, "grad_norm": 1.1533938982862755, "learning_rate": 1.274798879437309e-05, "loss": 0.9432, "step": 3045 }, { "epoch": 0.42916519901373723, "grad_norm": 0.9103674160867868, "learning_rate": 1.2743600584531644e-05, "loss": 0.8458, "step": 3046 }, { "epoch": 0.42930609369496303, "grad_norm": 0.9260768971656702, "learning_rate": 1.2739211803291952e-05, "loss": 0.8876, "step": 3047 }, { "epoch": 0.4294469883761888, "grad_norm": 1.3824114254200963, "learning_rate": 1.2734822451568047e-05, "loss": 0.5125, "step": 3048 }, { "epoch": 0.4295878830574146, "grad_norm": 0.8599609133832239, "learning_rate": 1.2730432530274089e-05, "loss": 0.8907, "step": 3049 }, { "epoch": 0.4297287777386404, "grad_norm": 0.9422259556406036, "learning_rate": 1.2726042040324338e-05, "loss": 0.9134, "step": 3050 }, { "epoch": 0.4298696724198661, "grad_norm": 1.2656785660872827, "learning_rate": 1.2721650982633188e-05, "loss": 0.8484, "step": 3051 }, { "epoch": 0.4300105671010919, "grad_norm": 1.2953950335554147, "learning_rate": 1.2717259358115151e-05, "loss": 0.5241, "step": 3052 }, { "epoch": 0.4301514617823177, "grad_norm": 1.0988092562211058, "learning_rate": 1.2712867167684842e-05, "loss": 0.9525, "step": 3053 }, { "epoch": 0.4302923564635435, "grad_norm": 1.0310926480947815, "learning_rate": 1.2708474412257013e-05, "loss": 0.8963, "step": 3054 }, { "epoch": 0.43043325114476927, "grad_norm": 1.0136834458442472, "learning_rate": 1.270408109274652e-05, "loss": 0.9295, "step": 3055 }, { "epoch": 0.43057414582599507, "grad_norm": 1.2143057874390117, "learning_rate": 1.2699687210068343e-05, "loss": 0.5173, "step": 3056 }, { "epoch": 0.43071504050722087, "grad_norm": 0.9116098179943746, "learning_rate": 1.2695292765137577e-05, "loss": 0.8572, "step": 3057 }, { "epoch": 0.4308559351884466, "grad_norm": 1.0225985704378777, "learning_rate": 1.2690897758869436e-05, "loss": 0.9194, "step": 3058 }, { "epoch": 0.4309968298696724, "grad_norm": 1.2444642526541057, "learning_rate": 1.2686502192179248e-05, "loss": 0.5167, "step": 3059 }, { "epoch": 0.4311377245508982, "grad_norm": 1.0564454106497567, "learning_rate": 1.2682106065982462e-05, "loss": 0.8864, "step": 3060 }, { "epoch": 0.43127861923212396, "grad_norm": 0.9522814396116908, "learning_rate": 1.2677709381194637e-05, "loss": 0.8587, "step": 3061 }, { "epoch": 0.43141951391334976, "grad_norm": 1.2036120558167407, "learning_rate": 1.2673312138731455e-05, "loss": 0.4459, "step": 3062 }, { "epoch": 0.43156040859457556, "grad_norm": 1.0408543093501186, "learning_rate": 1.2668914339508708e-05, "loss": 0.8738, "step": 3063 }, { "epoch": 0.43170130327580136, "grad_norm": 1.0708428852314642, "learning_rate": 1.2664515984442313e-05, "loss": 0.887, "step": 3064 }, { "epoch": 0.4318421979570271, "grad_norm": 1.298907553100788, "learning_rate": 1.2660117074448293e-05, "loss": 0.6007, "step": 3065 }, { "epoch": 0.4319830926382529, "grad_norm": 1.0294244863605848, "learning_rate": 1.2655717610442789e-05, "loss": 0.9085, "step": 3066 }, { "epoch": 0.4321239873194787, "grad_norm": 1.0533477265266542, "learning_rate": 1.2651317593342065e-05, "loss": 0.8845, "step": 3067 }, { "epoch": 0.43226488200070445, "grad_norm": 1.0337399006954204, "learning_rate": 1.2646917024062489e-05, "loss": 0.9053, "step": 3068 }, { "epoch": 0.43240577668193025, "grad_norm": 0.9237141360184516, "learning_rate": 1.2642515903520552e-05, "loss": 0.9222, "step": 3069 }, { "epoch": 0.43254667136315605, "grad_norm": 1.3253325225410204, "learning_rate": 1.2638114232632852e-05, "loss": 0.5682, "step": 3070 }, { "epoch": 0.4326875660443818, "grad_norm": 0.9691801281438663, "learning_rate": 1.2633712012316113e-05, "loss": 0.8867, "step": 3071 }, { "epoch": 0.4328284607256076, "grad_norm": 1.0213722246843957, "learning_rate": 1.2629309243487167e-05, "loss": 0.9437, "step": 3072 }, { "epoch": 0.4329693554068334, "grad_norm": 0.9161015737816186, "learning_rate": 1.2624905927062953e-05, "loss": 0.9323, "step": 3073 }, { "epoch": 0.4331102500880592, "grad_norm": 0.9905815996682634, "learning_rate": 1.262050206396054e-05, "loss": 0.9087, "step": 3074 }, { "epoch": 0.43325114476928495, "grad_norm": 0.9158987847717913, "learning_rate": 1.2616097655097095e-05, "loss": 0.8882, "step": 3075 }, { "epoch": 0.43339203945051075, "grad_norm": 0.8887400330831209, "learning_rate": 1.2611692701389911e-05, "loss": 0.8418, "step": 3076 }, { "epoch": 0.43353293413173655, "grad_norm": 0.9703381267620517, "learning_rate": 1.260728720375639e-05, "loss": 0.8835, "step": 3077 }, { "epoch": 0.4336738288129623, "grad_norm": 1.443737033918286, "learning_rate": 1.260288116311404e-05, "loss": 0.5144, "step": 3078 }, { "epoch": 0.4338147234941881, "grad_norm": 1.0832660698681842, "learning_rate": 1.2598474580380493e-05, "loss": 0.8937, "step": 3079 }, { "epoch": 0.4339556181754139, "grad_norm": 1.3940173639128137, "learning_rate": 1.2594067456473494e-05, "loss": 0.5899, "step": 3080 }, { "epoch": 0.43409651285663964, "grad_norm": 0.9751904701894691, "learning_rate": 1.2589659792310887e-05, "loss": 0.9319, "step": 3081 }, { "epoch": 0.43423740753786544, "grad_norm": 0.9439138169881786, "learning_rate": 1.2585251588810646e-05, "loss": 0.848, "step": 3082 }, { "epoch": 0.43437830221909124, "grad_norm": 1.200535843495965, "learning_rate": 1.2580842846890842e-05, "loss": 0.4917, "step": 3083 }, { "epoch": 0.43451919690031704, "grad_norm": 0.9938287863734625, "learning_rate": 1.257643356746967e-05, "loss": 0.8709, "step": 3084 }, { "epoch": 0.4346600915815428, "grad_norm": 0.9207650593004197, "learning_rate": 1.2572023751465432e-05, "loss": 0.857, "step": 3085 }, { "epoch": 0.4348009862627686, "grad_norm": 0.9825450637548909, "learning_rate": 1.2567613399796541e-05, "loss": 0.8662, "step": 3086 }, { "epoch": 0.4349418809439944, "grad_norm": 1.1214230393282638, "learning_rate": 1.2563202513381525e-05, "loss": 0.8566, "step": 3087 }, { "epoch": 0.43508277562522013, "grad_norm": 0.9940060207129533, "learning_rate": 1.2558791093139016e-05, "loss": 0.8471, "step": 3088 }, { "epoch": 0.43522367030644593, "grad_norm": 1.1270171271854041, "learning_rate": 1.2554379139987765e-05, "loss": 0.5753, "step": 3089 }, { "epoch": 0.43536456498767173, "grad_norm": 0.95727986499155, "learning_rate": 1.2549966654846632e-05, "loss": 0.8722, "step": 3090 }, { "epoch": 0.4355054596688975, "grad_norm": 1.2977302546445104, "learning_rate": 1.2545553638634584e-05, "loss": 0.5965, "step": 3091 }, { "epoch": 0.4356463543501233, "grad_norm": 1.2235255104921097, "learning_rate": 1.2541140092270703e-05, "loss": 0.455, "step": 3092 }, { "epoch": 0.4357872490313491, "grad_norm": 1.0483625800639493, "learning_rate": 1.2536726016674182e-05, "loss": 0.9415, "step": 3093 }, { "epoch": 0.4359281437125748, "grad_norm": 0.976740142402816, "learning_rate": 1.2532311412764317e-05, "loss": 0.8773, "step": 3094 }, { "epoch": 0.4360690383938006, "grad_norm": 1.0151692895339177, "learning_rate": 1.2527896281460525e-05, "loss": 0.8766, "step": 3095 }, { "epoch": 0.4362099330750264, "grad_norm": 0.9824424842142685, "learning_rate": 1.2523480623682322e-05, "loss": 0.8908, "step": 3096 }, { "epoch": 0.4363508277562522, "grad_norm": 0.965713396560921, "learning_rate": 1.2519064440349343e-05, "loss": 0.9155, "step": 3097 }, { "epoch": 0.43649172243747797, "grad_norm": 1.157345679626157, "learning_rate": 1.2514647732381324e-05, "loss": 0.4731, "step": 3098 }, { "epoch": 0.43663261711870377, "grad_norm": 0.9482640250013389, "learning_rate": 1.2510230500698115e-05, "loss": 0.9055, "step": 3099 }, { "epoch": 0.43677351179992957, "grad_norm": 1.3331238149622857, "learning_rate": 1.2505812746219678e-05, "loss": 0.8532, "step": 3100 }, { "epoch": 0.4369144064811553, "grad_norm": 0.9973953641622236, "learning_rate": 1.2501394469866076e-05, "loss": 0.9178, "step": 3101 }, { "epoch": 0.4370553011623811, "grad_norm": 1.0824930196171163, "learning_rate": 1.249697567255749e-05, "loss": 0.8962, "step": 3102 }, { "epoch": 0.4371961958436069, "grad_norm": 1.5741051960143533, "learning_rate": 1.2492556355214201e-05, "loss": 0.6401, "step": 3103 }, { "epoch": 0.43733709052483266, "grad_norm": 1.074160481891561, "learning_rate": 1.2488136518756599e-05, "loss": 0.8162, "step": 3104 }, { "epoch": 0.43747798520605846, "grad_norm": 1.1067759771351053, "learning_rate": 1.2483716164105189e-05, "loss": 0.941, "step": 3105 }, { "epoch": 0.43761887988728426, "grad_norm": 1.1229263477956406, "learning_rate": 1.2479295292180582e-05, "loss": 0.9132, "step": 3106 }, { "epoch": 0.43775977456851006, "grad_norm": 1.0563276244464088, "learning_rate": 1.2474873903903489e-05, "loss": 0.9209, "step": 3107 }, { "epoch": 0.4379006692497358, "grad_norm": 1.322081928833251, "learning_rate": 1.2470452000194737e-05, "loss": 0.5051, "step": 3108 }, { "epoch": 0.4380415639309616, "grad_norm": 1.3733275006683734, "learning_rate": 1.2466029581975254e-05, "loss": 0.4857, "step": 3109 }, { "epoch": 0.4381824586121874, "grad_norm": 1.2267841725109887, "learning_rate": 1.2461606650166082e-05, "loss": 0.5069, "step": 3110 }, { "epoch": 0.43832335329341315, "grad_norm": 1.1242603439280305, "learning_rate": 1.245718320568837e-05, "loss": 0.9849, "step": 3111 }, { "epoch": 0.43846424797463895, "grad_norm": 1.2884475141345233, "learning_rate": 1.2452759249463365e-05, "loss": 0.4933, "step": 3112 }, { "epoch": 0.43860514265586475, "grad_norm": 1.2535612143205777, "learning_rate": 1.2448334782412428e-05, "loss": 0.5936, "step": 3113 }, { "epoch": 0.4387460373370905, "grad_norm": 0.9918168000775192, "learning_rate": 1.244390980545702e-05, "loss": 0.8634, "step": 3114 }, { "epoch": 0.4388869320183163, "grad_norm": 1.3496847293345269, "learning_rate": 1.2439484319518718e-05, "loss": 0.4812, "step": 3115 }, { "epoch": 0.4390278266995421, "grad_norm": 1.111271317930624, "learning_rate": 1.2435058325519197e-05, "loss": 0.8519, "step": 3116 }, { "epoch": 0.4391687213807679, "grad_norm": 1.4428717726167246, "learning_rate": 1.2430631824380241e-05, "loss": 0.5656, "step": 3117 }, { "epoch": 0.43930961606199365, "grad_norm": 1.294565811951307, "learning_rate": 1.2426204817023741e-05, "loss": 0.5468, "step": 3118 }, { "epoch": 0.43945051074321945, "grad_norm": 1.0098202642423761, "learning_rate": 1.2421777304371687e-05, "loss": 0.887, "step": 3119 }, { "epoch": 0.43959140542444525, "grad_norm": 0.9623704008970133, "learning_rate": 1.241734928734618e-05, "loss": 0.8766, "step": 3120 }, { "epoch": 0.439732300105671, "grad_norm": 1.3133441577833365, "learning_rate": 1.2412920766869428e-05, "loss": 0.5143, "step": 3121 }, { "epoch": 0.4398731947868968, "grad_norm": 1.3527557341607186, "learning_rate": 1.2408491743863735e-05, "loss": 0.5527, "step": 3122 }, { "epoch": 0.4400140894681226, "grad_norm": 0.923661148367608, "learning_rate": 1.240406221925152e-05, "loss": 0.892, "step": 3123 }, { "epoch": 0.44015498414934834, "grad_norm": 1.219473857165689, "learning_rate": 1.2399632193955298e-05, "loss": 0.4968, "step": 3124 }, { "epoch": 0.44029587883057414, "grad_norm": 0.9171264663114467, "learning_rate": 1.2395201668897692e-05, "loss": 0.8376, "step": 3125 }, { "epoch": 0.44043677351179994, "grad_norm": 1.229328366175274, "learning_rate": 1.2390770645001433e-05, "loss": 0.4699, "step": 3126 }, { "epoch": 0.44057766819302574, "grad_norm": 1.1773265100634018, "learning_rate": 1.2386339123189348e-05, "loss": 0.4803, "step": 3127 }, { "epoch": 0.4407185628742515, "grad_norm": 1.0590607457211667, "learning_rate": 1.2381907104384375e-05, "loss": 0.8686, "step": 3128 }, { "epoch": 0.4408594575554773, "grad_norm": 0.8924255482096312, "learning_rate": 1.2377474589509546e-05, "loss": 0.8737, "step": 3129 }, { "epoch": 0.4410003522367031, "grad_norm": 1.310472970401362, "learning_rate": 1.2373041579488008e-05, "loss": 0.5898, "step": 3130 }, { "epoch": 0.44114124691792883, "grad_norm": 1.2197928154636821, "learning_rate": 1.2368608075243006e-05, "loss": 0.6643, "step": 3131 }, { "epoch": 0.44128214159915463, "grad_norm": 1.0207819285714415, "learning_rate": 1.2364174077697883e-05, "loss": 0.9077, "step": 3132 }, { "epoch": 0.44142303628038043, "grad_norm": 1.1389209022272033, "learning_rate": 1.2359739587776092e-05, "loss": 0.833, "step": 3133 }, { "epoch": 0.4415639309616062, "grad_norm": 0.9835469540879498, "learning_rate": 1.2355304606401185e-05, "loss": 0.9214, "step": 3134 }, { "epoch": 0.441704825642832, "grad_norm": 1.0928838825381977, "learning_rate": 1.2350869134496815e-05, "loss": 0.8788, "step": 3135 }, { "epoch": 0.4418457203240578, "grad_norm": 1.3225641333137559, "learning_rate": 1.2346433172986743e-05, "loss": 0.5679, "step": 3136 }, { "epoch": 0.4419866150052835, "grad_norm": 0.9131597846336612, "learning_rate": 1.2341996722794827e-05, "loss": 0.8901, "step": 3137 }, { "epoch": 0.4421275096865093, "grad_norm": 0.8589767523597706, "learning_rate": 1.2337559784845026e-05, "loss": 0.8537, "step": 3138 }, { "epoch": 0.4422684043677351, "grad_norm": 1.0296005869539326, "learning_rate": 1.2333122360061404e-05, "loss": 0.8827, "step": 3139 }, { "epoch": 0.4424092990489609, "grad_norm": 1.0281296642099647, "learning_rate": 1.2328684449368126e-05, "loss": 0.8384, "step": 3140 }, { "epoch": 0.44255019373018667, "grad_norm": 0.8727960146797711, "learning_rate": 1.2324246053689455e-05, "loss": 0.8445, "step": 3141 }, { "epoch": 0.44269108841141247, "grad_norm": 1.3832097709929463, "learning_rate": 1.231980717394976e-05, "loss": 0.4754, "step": 3142 }, { "epoch": 0.44283198309263827, "grad_norm": 0.9623261002180606, "learning_rate": 1.2315367811073504e-05, "loss": 0.8819, "step": 3143 }, { "epoch": 0.442972877773864, "grad_norm": 1.333499722100439, "learning_rate": 1.2310927965985258e-05, "loss": 0.5146, "step": 3144 }, { "epoch": 0.4431137724550898, "grad_norm": 1.2603534608833038, "learning_rate": 1.2306487639609687e-05, "loss": 0.5592, "step": 3145 }, { "epoch": 0.4432546671363156, "grad_norm": 1.4326556500978342, "learning_rate": 1.2302046832871564e-05, "loss": 0.5317, "step": 3146 }, { "epoch": 0.44339556181754136, "grad_norm": 1.0268812798816864, "learning_rate": 1.2297605546695753e-05, "loss": 0.8918, "step": 3147 }, { "epoch": 0.44353645649876716, "grad_norm": 0.916043785345858, "learning_rate": 1.2293163782007222e-05, "loss": 0.8513, "step": 3148 }, { "epoch": 0.44367735117999296, "grad_norm": 1.0206409127686449, "learning_rate": 1.2288721539731043e-05, "loss": 0.8663, "step": 3149 }, { "epoch": 0.44381824586121876, "grad_norm": 1.0172724619785825, "learning_rate": 1.228427882079238e-05, "loss": 0.8694, "step": 3150 }, { "epoch": 0.4439591405424445, "grad_norm": 1.225147720758445, "learning_rate": 1.2279835626116497e-05, "loss": 0.4305, "step": 3151 }, { "epoch": 0.4441000352236703, "grad_norm": 1.0155085281575178, "learning_rate": 1.2275391956628766e-05, "loss": 0.8621, "step": 3152 }, { "epoch": 0.4442409299048961, "grad_norm": 1.0562753819853374, "learning_rate": 1.2270947813254645e-05, "loss": 0.8777, "step": 3153 }, { "epoch": 0.44438182458612185, "grad_norm": 1.4607792124066725, "learning_rate": 1.2266503196919704e-05, "loss": 0.5487, "step": 3154 }, { "epoch": 0.44452271926734765, "grad_norm": 1.020148009212996, "learning_rate": 1.2262058108549597e-05, "loss": 0.9414, "step": 3155 }, { "epoch": 0.44466361394857346, "grad_norm": 0.9679129316464493, "learning_rate": 1.2257612549070091e-05, "loss": 0.867, "step": 3156 }, { "epoch": 0.4448045086297992, "grad_norm": 1.022601178300213, "learning_rate": 1.2253166519407041e-05, "loss": 0.9204, "step": 3157 }, { "epoch": 0.444945403311025, "grad_norm": 1.2663649126956644, "learning_rate": 1.22487200204864e-05, "loss": 0.4654, "step": 3158 }, { "epoch": 0.4450862979922508, "grad_norm": 0.9238784516787887, "learning_rate": 1.224427305323423e-05, "loss": 0.8429, "step": 3159 }, { "epoch": 0.4452271926734766, "grad_norm": 0.9537394774066349, "learning_rate": 1.2239825618576673e-05, "loss": 0.8437, "step": 3160 }, { "epoch": 0.44536808735470235, "grad_norm": 0.9518156656131419, "learning_rate": 1.2235377717439983e-05, "loss": 0.842, "step": 3161 }, { "epoch": 0.44550898203592815, "grad_norm": 1.077633168938523, "learning_rate": 1.2230929350750507e-05, "loss": 0.8621, "step": 3162 }, { "epoch": 0.44564987671715395, "grad_norm": 1.5160525366253221, "learning_rate": 1.2226480519434682e-05, "loss": 0.5366, "step": 3163 }, { "epoch": 0.4457907713983797, "grad_norm": 0.9819810076936254, "learning_rate": 1.2222031224419052e-05, "loss": 0.8403, "step": 3164 }, { "epoch": 0.4459316660796055, "grad_norm": 1.010519032790527, "learning_rate": 1.2217581466630255e-05, "loss": 0.8374, "step": 3165 }, { "epoch": 0.4460725607608313, "grad_norm": 1.0095029092464887, "learning_rate": 1.2213131246995017e-05, "loss": 0.8598, "step": 3166 }, { "epoch": 0.44621345544205704, "grad_norm": 1.0996172694134805, "learning_rate": 1.2208680566440174e-05, "loss": 0.8871, "step": 3167 }, { "epoch": 0.44635435012328284, "grad_norm": 1.321746556804889, "learning_rate": 1.2204229425892643e-05, "loss": 0.5667, "step": 3168 }, { "epoch": 0.44649524480450864, "grad_norm": 0.9786798357205514, "learning_rate": 1.219977782627945e-05, "loss": 0.8889, "step": 3169 }, { "epoch": 0.44663613948573444, "grad_norm": 1.1653708254497988, "learning_rate": 1.2195325768527711e-05, "loss": 0.9012, "step": 3170 }, { "epoch": 0.4467770341669602, "grad_norm": 1.1210743616282401, "learning_rate": 1.2190873253564634e-05, "loss": 0.8825, "step": 3171 }, { "epoch": 0.446917928848186, "grad_norm": 0.9501500335174906, "learning_rate": 1.218642028231753e-05, "loss": 0.835, "step": 3172 }, { "epoch": 0.4470588235294118, "grad_norm": 0.9228790136286656, "learning_rate": 1.2181966855713798e-05, "loss": 0.8437, "step": 3173 }, { "epoch": 0.44719971821063753, "grad_norm": 0.9762624611792383, "learning_rate": 1.2177512974680932e-05, "loss": 0.8805, "step": 3174 }, { "epoch": 0.44734061289186333, "grad_norm": 1.1438055419053792, "learning_rate": 1.217305864014653e-05, "loss": 0.431, "step": 3175 }, { "epoch": 0.44748150757308913, "grad_norm": 1.2854331435770663, "learning_rate": 1.2168603853038272e-05, "loss": 0.5058, "step": 3176 }, { "epoch": 0.4476224022543149, "grad_norm": 1.0964384348971778, "learning_rate": 1.2164148614283937e-05, "loss": 0.9369, "step": 3177 }, { "epoch": 0.4477632969355407, "grad_norm": 1.3364208132820783, "learning_rate": 1.2159692924811407e-05, "loss": 0.6271, "step": 3178 }, { "epoch": 0.4479041916167665, "grad_norm": 0.9948593981497079, "learning_rate": 1.215523678554864e-05, "loss": 0.9106, "step": 3179 }, { "epoch": 0.4480450862979922, "grad_norm": 0.93535515889377, "learning_rate": 1.2150780197423706e-05, "loss": 0.879, "step": 3180 }, { "epoch": 0.448185980979218, "grad_norm": 1.0143586581521347, "learning_rate": 1.2146323161364754e-05, "loss": 0.8522, "step": 3181 }, { "epoch": 0.4483268756604438, "grad_norm": 1.0354603870875059, "learning_rate": 1.2141865678300036e-05, "loss": 0.8963, "step": 3182 }, { "epoch": 0.4484677703416696, "grad_norm": 0.9497604020404018, "learning_rate": 1.213740774915789e-05, "loss": 0.9199, "step": 3183 }, { "epoch": 0.44860866502289537, "grad_norm": 1.3305760009748204, "learning_rate": 1.2132949374866754e-05, "loss": 0.562, "step": 3184 }, { "epoch": 0.44874955970412117, "grad_norm": 1.0604648207524916, "learning_rate": 1.2128490556355152e-05, "loss": 0.8846, "step": 3185 }, { "epoch": 0.44889045438534697, "grad_norm": 1.0918139328628707, "learning_rate": 1.2124031294551704e-05, "loss": 0.9258, "step": 3186 }, { "epoch": 0.4490313490665727, "grad_norm": 1.2504975809289176, "learning_rate": 1.2119571590385124e-05, "loss": 0.5187, "step": 3187 }, { "epoch": 0.4491722437477985, "grad_norm": 1.004240807473542, "learning_rate": 1.2115111444784216e-05, "loss": 0.8499, "step": 3188 }, { "epoch": 0.4493131384290243, "grad_norm": 1.1058903531815407, "learning_rate": 1.211065085867787e-05, "loss": 0.9443, "step": 3189 }, { "epoch": 0.44945403311025006, "grad_norm": 0.9538728994650747, "learning_rate": 1.210618983299508e-05, "loss": 0.8577, "step": 3190 }, { "epoch": 0.44959492779147586, "grad_norm": 0.9506470899747922, "learning_rate": 1.2101728368664925e-05, "loss": 0.8738, "step": 3191 }, { "epoch": 0.44973582247270166, "grad_norm": 0.9195912131070001, "learning_rate": 1.209726646661657e-05, "loss": 0.888, "step": 3192 }, { "epoch": 0.44987671715392746, "grad_norm": 1.0411400289817176, "learning_rate": 1.2092804127779285e-05, "loss": 0.9414, "step": 3193 }, { "epoch": 0.4500176118351532, "grad_norm": 0.9808191132911614, "learning_rate": 1.2088341353082416e-05, "loss": 0.8741, "step": 3194 }, { "epoch": 0.450158506516379, "grad_norm": 1.047104282859704, "learning_rate": 1.208387814345541e-05, "loss": 0.825, "step": 3195 }, { "epoch": 0.4502994011976048, "grad_norm": 1.1435946542869186, "learning_rate": 1.20794144998278e-05, "loss": 0.895, "step": 3196 }, { "epoch": 0.45044029587883055, "grad_norm": 0.9701824249449641, "learning_rate": 1.2074950423129205e-05, "loss": 0.9218, "step": 3197 }, { "epoch": 0.45058119056005635, "grad_norm": 1.0025274613380433, "learning_rate": 1.2070485914289352e-05, "loss": 0.9149, "step": 3198 }, { "epoch": 0.45072208524128216, "grad_norm": 0.989290885749201, "learning_rate": 1.2066020974238035e-05, "loss": 0.7847, "step": 3199 }, { "epoch": 0.4508629799225079, "grad_norm": 1.007939141426468, "learning_rate": 1.206155560390515e-05, "loss": 0.8759, "step": 3200 }, { "epoch": 0.4510038746037337, "grad_norm": 1.214071568474075, "learning_rate": 1.2057089804220687e-05, "loss": 0.493, "step": 3201 }, { "epoch": 0.4511447692849595, "grad_norm": 0.9989695160792114, "learning_rate": 1.2052623576114712e-05, "loss": 0.886, "step": 3202 }, { "epoch": 0.4512856639661853, "grad_norm": 1.197799598166202, "learning_rate": 1.2048156920517396e-05, "loss": 0.4997, "step": 3203 }, { "epoch": 0.45142655864741105, "grad_norm": 0.9818044212251141, "learning_rate": 1.204368983835898e-05, "loss": 0.9125, "step": 3204 }, { "epoch": 0.45156745332863685, "grad_norm": 1.1916600119892884, "learning_rate": 1.203922233056981e-05, "loss": 0.5074, "step": 3205 }, { "epoch": 0.45170834800986265, "grad_norm": 1.3433531672945502, "learning_rate": 1.2034754398080321e-05, "loss": 0.5575, "step": 3206 }, { "epoch": 0.4518492426910884, "grad_norm": 0.9844943810618993, "learning_rate": 1.2030286041821023e-05, "loss": 0.9355, "step": 3207 }, { "epoch": 0.4519901373723142, "grad_norm": 0.973363707639663, "learning_rate": 1.2025817262722523e-05, "loss": 0.9118, "step": 3208 }, { "epoch": 0.45213103205354, "grad_norm": 1.1677528062750102, "learning_rate": 1.2021348061715516e-05, "loss": 0.8646, "step": 3209 }, { "epoch": 0.45227192673476574, "grad_norm": 1.0234466436567105, "learning_rate": 1.2016878439730783e-05, "loss": 0.876, "step": 3210 }, { "epoch": 0.45241282141599154, "grad_norm": 1.118463862520955, "learning_rate": 1.2012408397699198e-05, "loss": 0.5195, "step": 3211 }, { "epoch": 0.45255371609721734, "grad_norm": 0.8826698172669915, "learning_rate": 1.2007937936551711e-05, "loss": 0.8084, "step": 3212 }, { "epoch": 0.45269461077844314, "grad_norm": 1.291038327168555, "learning_rate": 1.2003467057219372e-05, "loss": 0.4765, "step": 3213 }, { "epoch": 0.4528355054596689, "grad_norm": 1.4138302870035588, "learning_rate": 1.199899576063331e-05, "loss": 0.61, "step": 3214 }, { "epoch": 0.4529764001408947, "grad_norm": 1.0943989741089521, "learning_rate": 1.1994524047724742e-05, "loss": 0.9111, "step": 3215 }, { "epoch": 0.4531172948221205, "grad_norm": 0.9614756870091343, "learning_rate": 1.1990051919424977e-05, "loss": 0.8701, "step": 3216 }, { "epoch": 0.45325818950334623, "grad_norm": 1.1497045357063456, "learning_rate": 1.1985579376665401e-05, "loss": 0.9254, "step": 3217 }, { "epoch": 0.45339908418457203, "grad_norm": 1.3076068538419403, "learning_rate": 1.1981106420377496e-05, "loss": 0.6138, "step": 3218 }, { "epoch": 0.45353997886579783, "grad_norm": 0.9892305047363172, "learning_rate": 1.1976633051492824e-05, "loss": 0.9073, "step": 3219 }, { "epoch": 0.4536808735470236, "grad_norm": 1.1657240574983314, "learning_rate": 1.1972159270943034e-05, "loss": 0.974, "step": 3220 }, { "epoch": 0.4538217682282494, "grad_norm": 1.4164225028771176, "learning_rate": 1.1967685079659864e-05, "loss": 0.5876, "step": 3221 }, { "epoch": 0.4539626629094752, "grad_norm": 1.1068279020494276, "learning_rate": 1.1963210478575136e-05, "loss": 0.5136, "step": 3222 }, { "epoch": 0.4541035575907009, "grad_norm": 0.9375559934279347, "learning_rate": 1.1958735468620753e-05, "loss": 0.8418, "step": 3223 }, { "epoch": 0.4542444522719267, "grad_norm": 1.6835844753989082, "learning_rate": 1.195426005072871e-05, "loss": 0.4951, "step": 3224 }, { "epoch": 0.4543853469531525, "grad_norm": 0.9610924104575719, "learning_rate": 1.1949784225831081e-05, "loss": 0.8169, "step": 3225 }, { "epoch": 0.4545262416343783, "grad_norm": 1.1138913585202845, "learning_rate": 1.1945307994860027e-05, "loss": 0.9497, "step": 3226 }, { "epoch": 0.45466713631560407, "grad_norm": 1.2786441022018056, "learning_rate": 1.1940831358747797e-05, "loss": 0.5566, "step": 3227 }, { "epoch": 0.45480803099682987, "grad_norm": 0.9495815160959392, "learning_rate": 1.193635431842672e-05, "loss": 0.877, "step": 3228 }, { "epoch": 0.45494892567805567, "grad_norm": 0.8083151396836649, "learning_rate": 1.1931876874829209e-05, "loss": 0.816, "step": 3229 }, { "epoch": 0.4550898203592814, "grad_norm": 1.0045966860003195, "learning_rate": 1.1927399028887763e-05, "loss": 0.8437, "step": 3230 }, { "epoch": 0.4552307150405072, "grad_norm": 0.9635095643246544, "learning_rate": 1.1922920781534966e-05, "loss": 0.925, "step": 3231 }, { "epoch": 0.455371609721733, "grad_norm": 0.9347666443310744, "learning_rate": 1.1918442133703482e-05, "loss": 0.8628, "step": 3232 }, { "epoch": 0.45551250440295876, "grad_norm": 1.0141112836325787, "learning_rate": 1.1913963086326061e-05, "loss": 0.8997, "step": 3233 }, { "epoch": 0.45565339908418456, "grad_norm": 1.068520803888324, "learning_rate": 1.1909483640335538e-05, "loss": 0.8914, "step": 3234 }, { "epoch": 0.45579429376541036, "grad_norm": 0.95274385387791, "learning_rate": 1.1905003796664824e-05, "loss": 0.8955, "step": 3235 }, { "epoch": 0.45593518844663616, "grad_norm": 1.1394899636106117, "learning_rate": 1.190052355624692e-05, "loss": 0.869, "step": 3236 }, { "epoch": 0.4560760831278619, "grad_norm": 1.3829763594085789, "learning_rate": 1.1896042920014908e-05, "loss": 0.639, "step": 3237 }, { "epoch": 0.4562169778090877, "grad_norm": 1.2777770974593095, "learning_rate": 1.1891561888901947e-05, "loss": 0.4639, "step": 3238 }, { "epoch": 0.4563578724903135, "grad_norm": 1.0453934487695746, "learning_rate": 1.1887080463841287e-05, "loss": 0.9059, "step": 3239 }, { "epoch": 0.45649876717153925, "grad_norm": 1.3737906267349747, "learning_rate": 1.1882598645766252e-05, "loss": 0.5424, "step": 3240 }, { "epoch": 0.45663966185276506, "grad_norm": 0.86374620017915, "learning_rate": 1.1878116435610257e-05, "loss": 0.8912, "step": 3241 }, { "epoch": 0.45678055653399086, "grad_norm": 0.9633641739340206, "learning_rate": 1.1873633834306788e-05, "loss": 0.8875, "step": 3242 }, { "epoch": 0.4569214512152166, "grad_norm": 1.386171090121974, "learning_rate": 1.1869150842789419e-05, "loss": 0.4991, "step": 3243 }, { "epoch": 0.4570623458964424, "grad_norm": 1.0983495895914952, "learning_rate": 1.1864667461991808e-05, "loss": 0.9191, "step": 3244 }, { "epoch": 0.4572032405776682, "grad_norm": 1.305248927182144, "learning_rate": 1.1860183692847683e-05, "loss": 0.5022, "step": 3245 }, { "epoch": 0.457344135258894, "grad_norm": 0.953074805583981, "learning_rate": 1.1855699536290863e-05, "loss": 0.8409, "step": 3246 }, { "epoch": 0.45748502994011975, "grad_norm": 1.2640851259608492, "learning_rate": 1.185121499325525e-05, "loss": 0.5351, "step": 3247 }, { "epoch": 0.45762592462134555, "grad_norm": 1.3697766165852867, "learning_rate": 1.184673006467481e-05, "loss": 0.4907, "step": 3248 }, { "epoch": 0.45776681930257135, "grad_norm": 1.266859408881116, "learning_rate": 1.184224475148361e-05, "loss": 0.6378, "step": 3249 }, { "epoch": 0.4579077139837971, "grad_norm": 0.9918006987703556, "learning_rate": 1.1837759054615783e-05, "loss": 0.8883, "step": 3250 }, { "epoch": 0.4580486086650229, "grad_norm": 1.0453352732561034, "learning_rate": 1.1833272975005545e-05, "loss": 0.9193, "step": 3251 }, { "epoch": 0.4581895033462487, "grad_norm": 0.9630295859527329, "learning_rate": 1.18287865135872e-05, "loss": 0.8804, "step": 3252 }, { "epoch": 0.45833039802747444, "grad_norm": 1.0084497690577179, "learning_rate": 1.1824299671295117e-05, "loss": 0.9126, "step": 3253 }, { "epoch": 0.45847129270870024, "grad_norm": 0.951656635872459, "learning_rate": 1.1819812449063754e-05, "loss": 0.8674, "step": 3254 }, { "epoch": 0.45861218738992604, "grad_norm": 1.3658508600395585, "learning_rate": 1.181532484782765e-05, "loss": 0.5038, "step": 3255 }, { "epoch": 0.45875308207115184, "grad_norm": 0.8581600039182092, "learning_rate": 1.1810836868521413e-05, "loss": 0.8719, "step": 3256 }, { "epoch": 0.4588939767523776, "grad_norm": 0.9379447740048746, "learning_rate": 1.1806348512079742e-05, "loss": 0.8599, "step": 3257 }, { "epoch": 0.4590348714336034, "grad_norm": 1.1169518804338152, "learning_rate": 1.1801859779437403e-05, "loss": 0.9298, "step": 3258 }, { "epoch": 0.4591757661148292, "grad_norm": 0.934245376407298, "learning_rate": 1.1797370671529246e-05, "loss": 0.8839, "step": 3259 }, { "epoch": 0.45931666079605493, "grad_norm": 0.9734107236160428, "learning_rate": 1.1792881189290202e-05, "loss": 0.8532, "step": 3260 }, { "epoch": 0.45945755547728073, "grad_norm": 0.9367053452995847, "learning_rate": 1.1788391333655274e-05, "loss": 0.8897, "step": 3261 }, { "epoch": 0.45959845015850653, "grad_norm": 0.9264426709718514, "learning_rate": 1.1783901105559545e-05, "loss": 0.822, "step": 3262 }, { "epoch": 0.4597393448397323, "grad_norm": 1.2645907421763911, "learning_rate": 1.177941050593818e-05, "loss": 0.5654, "step": 3263 }, { "epoch": 0.4598802395209581, "grad_norm": 0.9396697057996976, "learning_rate": 1.1774919535726415e-05, "loss": 0.8855, "step": 3264 }, { "epoch": 0.4600211342021839, "grad_norm": 1.096522842472502, "learning_rate": 1.1770428195859567e-05, "loss": 0.8911, "step": 3265 }, { "epoch": 0.4601620288834096, "grad_norm": 1.2619343993021308, "learning_rate": 1.1765936487273022e-05, "loss": 0.5457, "step": 3266 }, { "epoch": 0.4603029235646354, "grad_norm": 1.0864168461250339, "learning_rate": 1.1761444410902258e-05, "loss": 0.8523, "step": 3267 }, { "epoch": 0.4604438182458612, "grad_norm": 0.9638783049670551, "learning_rate": 1.175695196768282e-05, "loss": 0.8671, "step": 3268 }, { "epoch": 0.460584712927087, "grad_norm": 1.5649284067926021, "learning_rate": 1.1752459158550322e-05, "loss": 0.598, "step": 3269 }, { "epoch": 0.46072560760831277, "grad_norm": 1.3042585849425106, "learning_rate": 1.1747965984440474e-05, "loss": 0.5299, "step": 3270 }, { "epoch": 0.46086650228953857, "grad_norm": 1.2224304404084172, "learning_rate": 1.1743472446289041e-05, "loss": 0.4859, "step": 3271 }, { "epoch": 0.46100739697076437, "grad_norm": 0.9849275930371121, "learning_rate": 1.173897854503188e-05, "loss": 0.9012, "step": 3272 }, { "epoch": 0.4611482916519901, "grad_norm": 1.2900071563028992, "learning_rate": 1.1734484281604916e-05, "loss": 0.5813, "step": 3273 }, { "epoch": 0.4612891863332159, "grad_norm": 1.0122289186038387, "learning_rate": 1.1729989656944147e-05, "loss": 0.8917, "step": 3274 }, { "epoch": 0.4614300810144417, "grad_norm": 0.9487594176207168, "learning_rate": 1.172549467198565e-05, "loss": 0.8445, "step": 3275 }, { "epoch": 0.46157097569566746, "grad_norm": 0.9722704957626734, "learning_rate": 1.1720999327665586e-05, "loss": 0.8646, "step": 3276 }, { "epoch": 0.46171187037689326, "grad_norm": 0.9860739771535157, "learning_rate": 1.1716503624920167e-05, "loss": 0.8709, "step": 3277 }, { "epoch": 0.46185276505811906, "grad_norm": 1.2677474968217475, "learning_rate": 1.1712007564685705e-05, "loss": 0.4993, "step": 3278 }, { "epoch": 0.46199365973934486, "grad_norm": 1.0568082520769742, "learning_rate": 1.170751114789857e-05, "loss": 0.9505, "step": 3279 }, { "epoch": 0.4621345544205706, "grad_norm": 0.9897932854939546, "learning_rate": 1.1703014375495216e-05, "loss": 0.8185, "step": 3280 }, { "epoch": 0.4622754491017964, "grad_norm": 0.9785176033822439, "learning_rate": 1.1698517248412166e-05, "loss": 0.914, "step": 3281 }, { "epoch": 0.4624163437830222, "grad_norm": 1.0319153254213356, "learning_rate": 1.1694019767586016e-05, "loss": 0.8798, "step": 3282 }, { "epoch": 0.46255723846424796, "grad_norm": 1.0176170198070598, "learning_rate": 1.168952193395344e-05, "loss": 0.891, "step": 3283 }, { "epoch": 0.46269813314547376, "grad_norm": 1.2438950086147345, "learning_rate": 1.1685023748451179e-05, "loss": 0.5471, "step": 3284 }, { "epoch": 0.46283902782669956, "grad_norm": 1.2071862735050993, "learning_rate": 1.1680525212016054e-05, "loss": 0.4606, "step": 3285 }, { "epoch": 0.4629799225079253, "grad_norm": 0.9537222988453503, "learning_rate": 1.167602632558496e-05, "loss": 0.8607, "step": 3286 }, { "epoch": 0.4631208171891511, "grad_norm": 1.0186440252844156, "learning_rate": 1.1671527090094856e-05, "loss": 0.8873, "step": 3287 }, { "epoch": 0.4632617118703769, "grad_norm": 1.0753012770032904, "learning_rate": 1.1667027506482782e-05, "loss": 0.888, "step": 3288 }, { "epoch": 0.4634026065516027, "grad_norm": 1.307666789826866, "learning_rate": 1.1662527575685842e-05, "loss": 0.5991, "step": 3289 }, { "epoch": 0.46354350123282845, "grad_norm": 0.900520856885177, "learning_rate": 1.1658027298641227e-05, "loss": 0.9254, "step": 3290 }, { "epoch": 0.46368439591405425, "grad_norm": 0.9359357639583408, "learning_rate": 1.1653526676286185e-05, "loss": 0.8697, "step": 3291 }, { "epoch": 0.46382529059528005, "grad_norm": 1.0646161835980827, "learning_rate": 1.164902570955804e-05, "loss": 0.9025, "step": 3292 }, { "epoch": 0.4639661852765058, "grad_norm": 1.1223474085689769, "learning_rate": 1.1644524399394194e-05, "loss": 0.8694, "step": 3293 }, { "epoch": 0.4641070799577316, "grad_norm": 0.8753665685552722, "learning_rate": 1.1640022746732114e-05, "loss": 0.8339, "step": 3294 }, { "epoch": 0.4642479746389574, "grad_norm": 0.9819235863491661, "learning_rate": 1.1635520752509341e-05, "loss": 0.8788, "step": 3295 }, { "epoch": 0.46438886932018314, "grad_norm": 1.3521148117447306, "learning_rate": 1.1631018417663488e-05, "loss": 0.4945, "step": 3296 }, { "epoch": 0.46452976400140894, "grad_norm": 1.2441813194203064, "learning_rate": 1.1626515743132234e-05, "loss": 0.6087, "step": 3297 }, { "epoch": 0.46467065868263474, "grad_norm": 0.9560832470373398, "learning_rate": 1.1622012729853336e-05, "loss": 0.8147, "step": 3298 }, { "epoch": 0.46481155336386054, "grad_norm": 0.9513313579514378, "learning_rate": 1.1617509378764616e-05, "loss": 0.9163, "step": 3299 }, { "epoch": 0.4649524480450863, "grad_norm": 1.2780028889700314, "learning_rate": 1.1613005690803967e-05, "loss": 0.5367, "step": 3300 }, { "epoch": 0.4650933427263121, "grad_norm": 1.0528954381711042, "learning_rate": 1.1608501666909355e-05, "loss": 0.9561, "step": 3301 }, { "epoch": 0.4652342374075379, "grad_norm": 1.0313297663895276, "learning_rate": 1.1603997308018813e-05, "loss": 0.9335, "step": 3302 }, { "epoch": 0.46537513208876363, "grad_norm": 1.0508541739094532, "learning_rate": 1.1599492615070445e-05, "loss": 0.9289, "step": 3303 }, { "epoch": 0.46551602676998943, "grad_norm": 0.9884232951209809, "learning_rate": 1.1594987589002426e-05, "loss": 0.9162, "step": 3304 }, { "epoch": 0.46565692145121523, "grad_norm": 0.9520166621251164, "learning_rate": 1.1590482230752997e-05, "loss": 0.9044, "step": 3305 }, { "epoch": 0.465797816132441, "grad_norm": 1.2970405441728061, "learning_rate": 1.158597654126047e-05, "loss": 0.6957, "step": 3306 }, { "epoch": 0.4659387108136668, "grad_norm": 1.2354731828150751, "learning_rate": 1.1581470521463234e-05, "loss": 0.4757, "step": 3307 }, { "epoch": 0.4660796054948926, "grad_norm": 1.0428343105509617, "learning_rate": 1.1576964172299728e-05, "loss": 0.8854, "step": 3308 }, { "epoch": 0.4662205001761183, "grad_norm": 1.2612197954776065, "learning_rate": 1.1572457494708475e-05, "loss": 0.5074, "step": 3309 }, { "epoch": 0.4663613948573441, "grad_norm": 0.979013378573272, "learning_rate": 1.1567950489628061e-05, "loss": 0.9101, "step": 3310 }, { "epoch": 0.4665022895385699, "grad_norm": 1.3714688276462932, "learning_rate": 1.1563443157997143e-05, "loss": 0.4864, "step": 3311 }, { "epoch": 0.4666431842197957, "grad_norm": 0.9411287539100179, "learning_rate": 1.1558935500754445e-05, "loss": 0.9273, "step": 3312 }, { "epoch": 0.46678407890102147, "grad_norm": 1.0971754112526961, "learning_rate": 1.1554427518838756e-05, "loss": 0.9382, "step": 3313 }, { "epoch": 0.46692497358224727, "grad_norm": 1.3256730668056518, "learning_rate": 1.1549919213188934e-05, "loss": 0.6175, "step": 3314 }, { "epoch": 0.46706586826347307, "grad_norm": 1.2324321799470317, "learning_rate": 1.1545410584743903e-05, "loss": 0.4737, "step": 3315 }, { "epoch": 0.4672067629446988, "grad_norm": 0.8732830264188453, "learning_rate": 1.1540901634442662e-05, "loss": 0.8565, "step": 3316 }, { "epoch": 0.4673476576259246, "grad_norm": 1.1532590178053206, "learning_rate": 1.1536392363224269e-05, "loss": 0.4515, "step": 3317 }, { "epoch": 0.4674885523071504, "grad_norm": 1.358196705386106, "learning_rate": 1.1531882772027847e-05, "loss": 0.5428, "step": 3318 }, { "epoch": 0.46762944698837616, "grad_norm": 0.9625210234353997, "learning_rate": 1.1527372861792595e-05, "loss": 0.9052, "step": 3319 }, { "epoch": 0.46777034166960196, "grad_norm": 1.0117919097088521, "learning_rate": 1.1522862633457774e-05, "loss": 0.9018, "step": 3320 }, { "epoch": 0.46791123635082776, "grad_norm": 1.0540748372413324, "learning_rate": 1.1518352087962703e-05, "loss": 0.9066, "step": 3321 }, { "epoch": 0.46805213103205356, "grad_norm": 1.0009839765909887, "learning_rate": 1.1513841226246783e-05, "loss": 0.763, "step": 3322 }, { "epoch": 0.4681930257132793, "grad_norm": 1.3496673051408115, "learning_rate": 1.1509330049249466e-05, "loss": 0.4819, "step": 3323 }, { "epoch": 0.4683339203945051, "grad_norm": 1.118687738583538, "learning_rate": 1.150481855791028e-05, "loss": 0.8708, "step": 3324 }, { "epoch": 0.4684748150757309, "grad_norm": 1.138213197587316, "learning_rate": 1.1500306753168812e-05, "loss": 0.8928, "step": 3325 }, { "epoch": 0.46861570975695666, "grad_norm": 0.9339123224978535, "learning_rate": 1.1495794635964718e-05, "loss": 0.9079, "step": 3326 }, { "epoch": 0.46875660443818246, "grad_norm": 0.9617167392729244, "learning_rate": 1.1491282207237719e-05, "loss": 0.8593, "step": 3327 }, { "epoch": 0.46889749911940826, "grad_norm": 1.2703251724503166, "learning_rate": 1.1486769467927597e-05, "loss": 0.5062, "step": 3328 }, { "epoch": 0.469038393800634, "grad_norm": 1.0035593399765095, "learning_rate": 1.1482256418974205e-05, "loss": 0.832, "step": 3329 }, { "epoch": 0.4691792884818598, "grad_norm": 1.0387313609664905, "learning_rate": 1.147774306131745e-05, "loss": 0.8184, "step": 3330 }, { "epoch": 0.4693201831630856, "grad_norm": 1.0847721118447662, "learning_rate": 1.1473229395897316e-05, "loss": 0.8889, "step": 3331 }, { "epoch": 0.4694610778443114, "grad_norm": 1.0255508305482146, "learning_rate": 1.1468715423653847e-05, "loss": 0.9003, "step": 3332 }, { "epoch": 0.46960197252553715, "grad_norm": 1.0844872875566616, "learning_rate": 1.1464201145527142e-05, "loss": 0.866, "step": 3333 }, { "epoch": 0.46974286720676295, "grad_norm": 1.00535793864576, "learning_rate": 1.1459686562457377e-05, "loss": 0.8398, "step": 3334 }, { "epoch": 0.46988376188798875, "grad_norm": 1.2802500032796693, "learning_rate": 1.1455171675384786e-05, "loss": 0.5801, "step": 3335 }, { "epoch": 0.4700246565692145, "grad_norm": 1.0121798136045712, "learning_rate": 1.1450656485249661e-05, "loss": 0.888, "step": 3336 }, { "epoch": 0.4701655512504403, "grad_norm": 0.9056327342520714, "learning_rate": 1.1446140992992363e-05, "loss": 0.8423, "step": 3337 }, { "epoch": 0.4703064459316661, "grad_norm": 0.9736458876665381, "learning_rate": 1.1441625199553316e-05, "loss": 0.8618, "step": 3338 }, { "epoch": 0.47044734061289184, "grad_norm": 1.0460012837031634, "learning_rate": 1.1437109105873005e-05, "loss": 0.8616, "step": 3339 }, { "epoch": 0.47058823529411764, "grad_norm": 1.1913353367143058, "learning_rate": 1.143259271289198e-05, "loss": 0.8683, "step": 3340 }, { "epoch": 0.47072912997534344, "grad_norm": 1.1933098177596924, "learning_rate": 1.1428076021550848e-05, "loss": 0.5309, "step": 3341 }, { "epoch": 0.47087002465656924, "grad_norm": 1.173242020802507, "learning_rate": 1.1423559032790282e-05, "loss": 0.8979, "step": 3342 }, { "epoch": 0.471010919337795, "grad_norm": 1.0339260106331478, "learning_rate": 1.1419041747551019e-05, "loss": 0.8924, "step": 3343 }, { "epoch": 0.4711518140190208, "grad_norm": 1.1147323309417727, "learning_rate": 1.141452416677385e-05, "loss": 0.9076, "step": 3344 }, { "epoch": 0.4712927087002466, "grad_norm": 1.0412045887314232, "learning_rate": 1.141000629139964e-05, "loss": 0.831, "step": 3345 }, { "epoch": 0.47143360338147233, "grad_norm": 1.1885682956655874, "learning_rate": 1.1405488122369302e-05, "loss": 0.8871, "step": 3346 }, { "epoch": 0.47157449806269813, "grad_norm": 1.1690570512733676, "learning_rate": 1.1400969660623818e-05, "loss": 0.5045, "step": 3347 }, { "epoch": 0.47171539274392393, "grad_norm": 1.3055876482123472, "learning_rate": 1.139645090710423e-05, "loss": 0.5008, "step": 3348 }, { "epoch": 0.4718562874251497, "grad_norm": 1.1730213912266159, "learning_rate": 1.1391931862751639e-05, "loss": 0.8776, "step": 3349 }, { "epoch": 0.4719971821063755, "grad_norm": 1.313659909603107, "learning_rate": 1.1387412528507209e-05, "loss": 0.6494, "step": 3350 }, { "epoch": 0.4721380767876013, "grad_norm": 1.3355832998499282, "learning_rate": 1.138289290531216e-05, "loss": 0.5393, "step": 3351 }, { "epoch": 0.472278971468827, "grad_norm": 0.9728921674661386, "learning_rate": 1.1378372994107778e-05, "loss": 0.8528, "step": 3352 }, { "epoch": 0.4724198661500528, "grad_norm": 1.026257492586074, "learning_rate": 1.1373852795835404e-05, "loss": 0.8402, "step": 3353 }, { "epoch": 0.4725607608312786, "grad_norm": 1.0504698896019522, "learning_rate": 1.1369332311436443e-05, "loss": 0.8607, "step": 3354 }, { "epoch": 0.4727016555125044, "grad_norm": 1.0283457375396403, "learning_rate": 1.1364811541852357e-05, "loss": 0.8932, "step": 3355 }, { "epoch": 0.47284255019373017, "grad_norm": 0.98987398016421, "learning_rate": 1.1360290488024666e-05, "loss": 0.837, "step": 3356 }, { "epoch": 0.47298344487495597, "grad_norm": 0.9564497073219099, "learning_rate": 1.1355769150894951e-05, "loss": 0.857, "step": 3357 }, { "epoch": 0.47312433955618177, "grad_norm": 1.0098895907980776, "learning_rate": 1.1351247531404855e-05, "loss": 0.9054, "step": 3358 }, { "epoch": 0.4732652342374075, "grad_norm": 1.1882657358655124, "learning_rate": 1.1346725630496072e-05, "loss": 0.8488, "step": 3359 }, { "epoch": 0.4734061289186333, "grad_norm": 0.9758032920255482, "learning_rate": 1.1342203449110363e-05, "loss": 0.8717, "step": 3360 }, { "epoch": 0.4735470235998591, "grad_norm": 1.1243990620527242, "learning_rate": 1.1337680988189543e-05, "loss": 0.9333, "step": 3361 }, { "epoch": 0.47368791828108486, "grad_norm": 0.9552407634325707, "learning_rate": 1.1333158248675487e-05, "loss": 0.9209, "step": 3362 }, { "epoch": 0.47382881296231066, "grad_norm": 0.8999050712975586, "learning_rate": 1.1328635231510128e-05, "loss": 0.7907, "step": 3363 }, { "epoch": 0.47396970764353646, "grad_norm": 1.1174139818592035, "learning_rate": 1.1324111937635451e-05, "loss": 0.9123, "step": 3364 }, { "epoch": 0.47411060232476226, "grad_norm": 0.9405740285907878, "learning_rate": 1.1319588367993508e-05, "loss": 0.8953, "step": 3365 }, { "epoch": 0.474251497005988, "grad_norm": 0.9477833517718597, "learning_rate": 1.1315064523526405e-05, "loss": 0.8575, "step": 3366 }, { "epoch": 0.4743923916872138, "grad_norm": 1.304066283673843, "learning_rate": 1.1310540405176298e-05, "loss": 0.7212, "step": 3367 }, { "epoch": 0.4745332863684396, "grad_norm": 0.9184677861617285, "learning_rate": 1.1306016013885413e-05, "loss": 0.8428, "step": 3368 }, { "epoch": 0.47467418104966536, "grad_norm": 1.2471110499392437, "learning_rate": 1.1301491350596021e-05, "loss": 0.4894, "step": 3369 }, { "epoch": 0.47481507573089116, "grad_norm": 0.9442017123122343, "learning_rate": 1.1296966416250456e-05, "loss": 0.9072, "step": 3370 }, { "epoch": 0.47495597041211696, "grad_norm": 1.1142611538217233, "learning_rate": 1.1292441211791111e-05, "loss": 0.4833, "step": 3371 }, { "epoch": 0.4750968650933427, "grad_norm": 0.9991849353013817, "learning_rate": 1.1287915738160428e-05, "loss": 0.8835, "step": 3372 }, { "epoch": 0.4752377597745685, "grad_norm": 0.9754657398100225, "learning_rate": 1.1283389996300911e-05, "loss": 0.8971, "step": 3373 }, { "epoch": 0.4753786544557943, "grad_norm": 1.0077837952670754, "learning_rate": 1.127886398715511e-05, "loss": 0.8666, "step": 3374 }, { "epoch": 0.4755195491370201, "grad_norm": 0.8928495748066508, "learning_rate": 1.1274337711665648e-05, "loss": 0.8571, "step": 3375 }, { "epoch": 0.47566044381824585, "grad_norm": 0.9229049941573818, "learning_rate": 1.1269811170775188e-05, "loss": 0.8586, "step": 3376 }, { "epoch": 0.47580133849947165, "grad_norm": 1.221386602336441, "learning_rate": 1.1265284365426454e-05, "loss": 0.5223, "step": 3377 }, { "epoch": 0.47594223318069745, "grad_norm": 1.1955411241423612, "learning_rate": 1.1260757296562228e-05, "loss": 0.4829, "step": 3378 }, { "epoch": 0.4760831278619232, "grad_norm": 0.9569947490502302, "learning_rate": 1.1256229965125339e-05, "loss": 0.8431, "step": 3379 }, { "epoch": 0.476224022543149, "grad_norm": 1.005012232804637, "learning_rate": 1.1251702372058676e-05, "loss": 0.8977, "step": 3380 }, { "epoch": 0.4763649172243748, "grad_norm": 1.291522749369353, "learning_rate": 1.1247174518305188e-05, "loss": 0.5658, "step": 3381 }, { "epoch": 0.47650581190560054, "grad_norm": 1.449682706813548, "learning_rate": 1.1242646404807864e-05, "loss": 0.6247, "step": 3382 }, { "epoch": 0.47664670658682634, "grad_norm": 0.97819174224358, "learning_rate": 1.1238118032509764e-05, "loss": 0.8895, "step": 3383 }, { "epoch": 0.47678760126805214, "grad_norm": 0.9330988217858635, "learning_rate": 1.1233589402353986e-05, "loss": 0.8214, "step": 3384 }, { "epoch": 0.47692849594927794, "grad_norm": 1.0080826778142318, "learning_rate": 1.1229060515283689e-05, "loss": 0.8539, "step": 3385 }, { "epoch": 0.4770693906305037, "grad_norm": 0.8920599030062573, "learning_rate": 1.1224531372242094e-05, "loss": 0.8716, "step": 3386 }, { "epoch": 0.4772102853117295, "grad_norm": 1.0399603702841487, "learning_rate": 1.1220001974172454e-05, "loss": 0.9585, "step": 3387 }, { "epoch": 0.4773511799929553, "grad_norm": 1.2339655473683893, "learning_rate": 1.1215472322018096e-05, "loss": 0.8829, "step": 3388 }, { "epoch": 0.47749207467418103, "grad_norm": 1.3630968446480052, "learning_rate": 1.1210942416722393e-05, "loss": 0.5667, "step": 3389 }, { "epoch": 0.47763296935540683, "grad_norm": 0.9976603011574064, "learning_rate": 1.1206412259228761e-05, "loss": 0.8251, "step": 3390 }, { "epoch": 0.47777386403663263, "grad_norm": 0.9996527870797993, "learning_rate": 1.1201881850480685e-05, "loss": 0.852, "step": 3391 }, { "epoch": 0.4779147587178584, "grad_norm": 1.032273740493509, "learning_rate": 1.1197351191421696e-05, "loss": 0.8397, "step": 3392 }, { "epoch": 0.4780556533990842, "grad_norm": 1.0555362107891877, "learning_rate": 1.1192820282995365e-05, "loss": 0.8881, "step": 3393 }, { "epoch": 0.47819654808031, "grad_norm": 1.5061003655761938, "learning_rate": 1.1188289126145335e-05, "loss": 0.5369, "step": 3394 }, { "epoch": 0.4783374427615357, "grad_norm": 1.0975363147434634, "learning_rate": 1.1183757721815288e-05, "loss": 0.8867, "step": 3395 }, { "epoch": 0.4784783374427615, "grad_norm": 1.0565883731944026, "learning_rate": 1.117922607094896e-05, "loss": 0.8857, "step": 3396 }, { "epoch": 0.4786192321239873, "grad_norm": 0.9591270286088472, "learning_rate": 1.1174694174490137e-05, "loss": 0.8782, "step": 3397 }, { "epoch": 0.4787601268052131, "grad_norm": 1.101048307631573, "learning_rate": 1.1170162033382662e-05, "loss": 0.9001, "step": 3398 }, { "epoch": 0.47890102148643887, "grad_norm": 0.9618183883223477, "learning_rate": 1.1165629648570427e-05, "loss": 0.8634, "step": 3399 }, { "epoch": 0.47904191616766467, "grad_norm": 0.9696494650281503, "learning_rate": 1.1161097020997366e-05, "loss": 0.887, "step": 3400 }, { "epoch": 0.4791828108488905, "grad_norm": 1.012148417572028, "learning_rate": 1.1156564151607474e-05, "loss": 0.9155, "step": 3401 }, { "epoch": 0.4793237055301162, "grad_norm": 0.9667730109671797, "learning_rate": 1.1152031041344795e-05, "loss": 0.9764, "step": 3402 }, { "epoch": 0.479464600211342, "grad_norm": 0.9984510249293942, "learning_rate": 1.114749769115342e-05, "loss": 0.8229, "step": 3403 }, { "epoch": 0.4796054948925678, "grad_norm": 1.1721545632003307, "learning_rate": 1.1142964101977486e-05, "loss": 0.9378, "step": 3404 }, { "epoch": 0.47974638957379356, "grad_norm": 1.0158962207791382, "learning_rate": 1.113843027476119e-05, "loss": 0.8187, "step": 3405 }, { "epoch": 0.47988728425501936, "grad_norm": 1.2481150735127982, "learning_rate": 1.113389621044877e-05, "loss": 0.477, "step": 3406 }, { "epoch": 0.48002817893624516, "grad_norm": 1.2583546254451472, "learning_rate": 1.1129361909984521e-05, "loss": 0.5242, "step": 3407 }, { "epoch": 0.48016907361747097, "grad_norm": 0.9967568049317586, "learning_rate": 1.1124827374312781e-05, "loss": 0.8708, "step": 3408 }, { "epoch": 0.4803099682986967, "grad_norm": 1.074063268368198, "learning_rate": 1.1120292604377938e-05, "loss": 0.898, "step": 3409 }, { "epoch": 0.4804508629799225, "grad_norm": 0.9472391287793307, "learning_rate": 1.1115757601124426e-05, "loss": 0.8511, "step": 3410 }, { "epoch": 0.4805917576611483, "grad_norm": 1.035784955585442, "learning_rate": 1.1111222365496736e-05, "loss": 0.8838, "step": 3411 }, { "epoch": 0.48073265234237406, "grad_norm": 1.0144961768189449, "learning_rate": 1.1106686898439402e-05, "loss": 0.8888, "step": 3412 }, { "epoch": 0.48087354702359986, "grad_norm": 1.094330725018353, "learning_rate": 1.1102151200897007e-05, "loss": 0.8543, "step": 3413 }, { "epoch": 0.48101444170482566, "grad_norm": 1.0660562957166326, "learning_rate": 1.1097615273814183e-05, "loss": 0.8156, "step": 3414 }, { "epoch": 0.4811553363860514, "grad_norm": 1.2949410605417264, "learning_rate": 1.1093079118135604e-05, "loss": 0.542, "step": 3415 }, { "epoch": 0.4812962310672772, "grad_norm": 1.581546154635412, "learning_rate": 1.1088542734805997e-05, "loss": 0.5508, "step": 3416 }, { "epoch": 0.481437125748503, "grad_norm": 1.251381744968725, "learning_rate": 1.1084006124770141e-05, "loss": 0.9457, "step": 3417 }, { "epoch": 0.4815780204297288, "grad_norm": 0.8942297208273573, "learning_rate": 1.107946928897285e-05, "loss": 0.8323, "step": 3418 }, { "epoch": 0.48171891511095455, "grad_norm": 1.1571202616767873, "learning_rate": 1.1074932228358997e-05, "loss": 0.5352, "step": 3419 }, { "epoch": 0.48185980979218035, "grad_norm": 0.9582650829099042, "learning_rate": 1.1070394943873494e-05, "loss": 0.8397, "step": 3420 }, { "epoch": 0.48200070447340615, "grad_norm": 0.9985230921039455, "learning_rate": 1.1065857436461305e-05, "loss": 0.9231, "step": 3421 }, { "epoch": 0.4821415991546319, "grad_norm": 1.3048235864259399, "learning_rate": 1.1061319707067435e-05, "loss": 0.9623, "step": 3422 }, { "epoch": 0.4822824938358577, "grad_norm": 1.0557816328538148, "learning_rate": 1.105678175663694e-05, "loss": 0.8687, "step": 3423 }, { "epoch": 0.4824233885170835, "grad_norm": 1.270833184947008, "learning_rate": 1.1052243586114915e-05, "loss": 0.582, "step": 3424 }, { "epoch": 0.48256428319830924, "grad_norm": 0.9091867985928208, "learning_rate": 1.1047705196446515e-05, "loss": 0.9298, "step": 3425 }, { "epoch": 0.48270517787953504, "grad_norm": 0.9135985630439273, "learning_rate": 1.1043166588576923e-05, "loss": 0.8793, "step": 3426 }, { "epoch": 0.48284607256076084, "grad_norm": 1.2654500443063623, "learning_rate": 1.1038627763451377e-05, "loss": 0.5244, "step": 3427 }, { "epoch": 0.48298696724198664, "grad_norm": 1.024416767432027, "learning_rate": 1.1034088722015166e-05, "loss": 0.9052, "step": 3428 }, { "epoch": 0.4831278619232124, "grad_norm": 0.9938370901570851, "learning_rate": 1.102954946521361e-05, "loss": 0.8584, "step": 3429 }, { "epoch": 0.4832687566044382, "grad_norm": 1.0676072453035923, "learning_rate": 1.1025009993992084e-05, "loss": 0.9074, "step": 3430 }, { "epoch": 0.483409651285664, "grad_norm": 1.0378104257549576, "learning_rate": 1.1020470309296004e-05, "loss": 0.9302, "step": 3431 }, { "epoch": 0.48355054596688973, "grad_norm": 1.3365542875991667, "learning_rate": 1.1015930412070829e-05, "loss": 0.5669, "step": 3432 }, { "epoch": 0.48369144064811553, "grad_norm": 1.303552232143653, "learning_rate": 1.1011390303262071e-05, "loss": 0.559, "step": 3433 }, { "epoch": 0.48383233532934133, "grad_norm": 0.8367319424336306, "learning_rate": 1.1006849983815272e-05, "loss": 0.817, "step": 3434 }, { "epoch": 0.4839732300105671, "grad_norm": 0.9193058532264297, "learning_rate": 1.100230945467603e-05, "loss": 0.7934, "step": 3435 }, { "epoch": 0.4841141246917929, "grad_norm": 0.9573266973472724, "learning_rate": 1.099776871678998e-05, "loss": 0.9368, "step": 3436 }, { "epoch": 0.4842550193730187, "grad_norm": 1.048586237048028, "learning_rate": 1.0993227771102805e-05, "loss": 0.9399, "step": 3437 }, { "epoch": 0.4843959140542444, "grad_norm": 1.0107977341442986, "learning_rate": 1.0988686618560227e-05, "loss": 0.8869, "step": 3438 }, { "epoch": 0.4845368087354702, "grad_norm": 1.3037574295874454, "learning_rate": 1.0984145260108013e-05, "loss": 0.6452, "step": 3439 }, { "epoch": 0.484677703416696, "grad_norm": 1.338171657451662, "learning_rate": 1.0979603696691974e-05, "loss": 0.5929, "step": 3440 }, { "epoch": 0.4848185980979218, "grad_norm": 1.0673895691929787, "learning_rate": 1.0975061929257957e-05, "loss": 0.9053, "step": 3441 }, { "epoch": 0.48495949277914757, "grad_norm": 1.0139858756580409, "learning_rate": 1.0970519958751865e-05, "loss": 0.8808, "step": 3442 }, { "epoch": 0.4851003874603734, "grad_norm": 1.0346676473011756, "learning_rate": 1.096597778611963e-05, "loss": 0.9017, "step": 3443 }, { "epoch": 0.4852412821415992, "grad_norm": 1.3878006497381723, "learning_rate": 1.0961435412307233e-05, "loss": 0.5402, "step": 3444 }, { "epoch": 0.4853821768228249, "grad_norm": 0.9039335160451961, "learning_rate": 1.0956892838260696e-05, "loss": 0.8204, "step": 3445 }, { "epoch": 0.4855230715040507, "grad_norm": 1.020261771093649, "learning_rate": 1.095235006492608e-05, "loss": 0.8647, "step": 3446 }, { "epoch": 0.4856639661852765, "grad_norm": 1.1840314085607244, "learning_rate": 1.0947807093249489e-05, "loss": 0.8843, "step": 3447 }, { "epoch": 0.48580486086650226, "grad_norm": 1.0198366345699046, "learning_rate": 1.0943263924177074e-05, "loss": 0.8491, "step": 3448 }, { "epoch": 0.48594575554772806, "grad_norm": 1.1688636245781174, "learning_rate": 1.0938720558655016e-05, "loss": 0.4881, "step": 3449 }, { "epoch": 0.48608665022895386, "grad_norm": 0.9297242480141322, "learning_rate": 1.0934176997629544e-05, "loss": 0.8583, "step": 3450 }, { "epoch": 0.48622754491017967, "grad_norm": 1.0220414859210796, "learning_rate": 1.0929633242046929e-05, "loss": 0.8869, "step": 3451 }, { "epoch": 0.4863684395914054, "grad_norm": 0.9424414645132847, "learning_rate": 1.0925089292853478e-05, "loss": 0.8773, "step": 3452 }, { "epoch": 0.4865093342726312, "grad_norm": 0.8845092268889244, "learning_rate": 1.092054515099554e-05, "loss": 0.8306, "step": 3453 }, { "epoch": 0.486650228953857, "grad_norm": 1.1919310353574415, "learning_rate": 1.0916000817419505e-05, "loss": 0.4169, "step": 3454 }, { "epoch": 0.48679112363508276, "grad_norm": 0.969952330092433, "learning_rate": 1.09114562930718e-05, "loss": 0.8714, "step": 3455 }, { "epoch": 0.48693201831630856, "grad_norm": 0.949471162666802, "learning_rate": 1.0906911578898902e-05, "loss": 0.8432, "step": 3456 }, { "epoch": 0.48707291299753436, "grad_norm": 1.3951129371836393, "learning_rate": 1.0902366675847312e-05, "loss": 0.5358, "step": 3457 }, { "epoch": 0.4872138076787601, "grad_norm": 1.4273040886284776, "learning_rate": 1.0897821584863578e-05, "loss": 0.6086, "step": 3458 }, { "epoch": 0.4873547023599859, "grad_norm": 1.3531115030936764, "learning_rate": 1.0893276306894293e-05, "loss": 0.5058, "step": 3459 }, { "epoch": 0.4874955970412117, "grad_norm": 1.194163358417778, "learning_rate": 1.0888730842886077e-05, "loss": 0.9562, "step": 3460 }, { "epoch": 0.4876364917224375, "grad_norm": 1.0737350201093776, "learning_rate": 1.08841851937856e-05, "loss": 0.8716, "step": 3461 }, { "epoch": 0.48777738640366325, "grad_norm": 1.0434019453355379, "learning_rate": 1.087963936053956e-05, "loss": 0.9089, "step": 3462 }, { "epoch": 0.48791828108488905, "grad_norm": 1.0210214990921722, "learning_rate": 1.0875093344094703e-05, "loss": 0.8801, "step": 3463 }, { "epoch": 0.48805917576611485, "grad_norm": 1.1997538571097228, "learning_rate": 1.0870547145397805e-05, "loss": 0.5711, "step": 3464 }, { "epoch": 0.4882000704473406, "grad_norm": 1.4351821806348228, "learning_rate": 1.0866000765395687e-05, "loss": 0.5803, "step": 3465 }, { "epoch": 0.4883409651285664, "grad_norm": 1.1701416618484617, "learning_rate": 1.0861454205035206e-05, "loss": 0.6118, "step": 3466 }, { "epoch": 0.4884818598097922, "grad_norm": 1.1806604074295617, "learning_rate": 1.0856907465263253e-05, "loss": 0.9115, "step": 3467 }, { "epoch": 0.48862275449101794, "grad_norm": 0.9956905504757257, "learning_rate": 1.0852360547026754e-05, "loss": 0.9068, "step": 3468 }, { "epoch": 0.48876364917224374, "grad_norm": 1.2181371673302455, "learning_rate": 1.0847813451272686e-05, "loss": 0.6403, "step": 3469 }, { "epoch": 0.48890454385346954, "grad_norm": 1.0709733954562348, "learning_rate": 1.0843266178948047e-05, "loss": 0.8346, "step": 3470 }, { "epoch": 0.48904543853469534, "grad_norm": 1.1784379667220444, "learning_rate": 1.0838718730999878e-05, "loss": 0.9015, "step": 3471 }, { "epoch": 0.4891863332159211, "grad_norm": 1.0254293641892793, "learning_rate": 1.0834171108375263e-05, "loss": 0.8611, "step": 3472 }, { "epoch": 0.4893272278971469, "grad_norm": 1.264944698981424, "learning_rate": 1.0829623312021307e-05, "loss": 0.5991, "step": 3473 }, { "epoch": 0.4894681225783727, "grad_norm": 1.0425995236898866, "learning_rate": 1.0825075342885172e-05, "loss": 0.8866, "step": 3474 }, { "epoch": 0.48960901725959843, "grad_norm": 0.9230816389567164, "learning_rate": 1.0820527201914035e-05, "loss": 0.8725, "step": 3475 }, { "epoch": 0.48974991194082423, "grad_norm": 0.9904630158435644, "learning_rate": 1.081597889005512e-05, "loss": 0.8831, "step": 3476 }, { "epoch": 0.48989080662205003, "grad_norm": 1.2676426604872773, "learning_rate": 1.0811430408255693e-05, "loss": 0.4641, "step": 3477 }, { "epoch": 0.4900317013032758, "grad_norm": 1.0799755747515167, "learning_rate": 1.0806881757463036e-05, "loss": 0.849, "step": 3478 }, { "epoch": 0.4901725959845016, "grad_norm": 1.219868924697329, "learning_rate": 1.0802332938624487e-05, "loss": 0.4942, "step": 3479 }, { "epoch": 0.4903134906657274, "grad_norm": 1.3250086337340825, "learning_rate": 1.0797783952687402e-05, "loss": 0.5569, "step": 3480 }, { "epoch": 0.4904543853469531, "grad_norm": 0.9361311354028122, "learning_rate": 1.0793234800599182e-05, "loss": 0.9006, "step": 3481 }, { "epoch": 0.4905952800281789, "grad_norm": 1.1890465225676754, "learning_rate": 1.0788685483307263e-05, "loss": 0.5029, "step": 3482 }, { "epoch": 0.4907361747094047, "grad_norm": 1.0226341431694936, "learning_rate": 1.0784136001759109e-05, "loss": 0.8715, "step": 3483 }, { "epoch": 0.4908770693906305, "grad_norm": 1.1330036150798692, "learning_rate": 1.0779586356902222e-05, "loss": 0.9145, "step": 3484 }, { "epoch": 0.49101796407185627, "grad_norm": 1.0349523627306152, "learning_rate": 1.077503654968414e-05, "loss": 0.8595, "step": 3485 }, { "epoch": 0.4911588587530821, "grad_norm": 1.0236668626920962, "learning_rate": 1.0770486581052426e-05, "loss": 0.9281, "step": 3486 }, { "epoch": 0.4912997534343079, "grad_norm": 1.1220851811730035, "learning_rate": 1.076593645195469e-05, "loss": 0.9288, "step": 3487 }, { "epoch": 0.4914406481155336, "grad_norm": 0.9450700707383893, "learning_rate": 1.0761386163338566e-05, "loss": 0.8961, "step": 3488 }, { "epoch": 0.4915815427967594, "grad_norm": 0.9945593891620463, "learning_rate": 1.0756835716151723e-05, "loss": 0.8506, "step": 3489 }, { "epoch": 0.4917224374779852, "grad_norm": 0.9915905027335778, "learning_rate": 1.0752285111341862e-05, "loss": 0.8953, "step": 3490 }, { "epoch": 0.49186333215921096, "grad_norm": 0.9435584553317931, "learning_rate": 1.0747734349856722e-05, "loss": 0.907, "step": 3491 }, { "epoch": 0.49200422684043676, "grad_norm": 1.0725292820847379, "learning_rate": 1.0743183432644071e-05, "loss": 0.8856, "step": 3492 }, { "epoch": 0.49214512152166257, "grad_norm": 0.9046903192161492, "learning_rate": 1.0738632360651704e-05, "loss": 0.8908, "step": 3493 }, { "epoch": 0.49228601620288837, "grad_norm": 1.0412486953343107, "learning_rate": 1.0734081134827463e-05, "loss": 0.9178, "step": 3494 }, { "epoch": 0.4924269108841141, "grad_norm": 1.0780714502147823, "learning_rate": 1.0729529756119204e-05, "loss": 0.8809, "step": 3495 }, { "epoch": 0.4925678055653399, "grad_norm": 1.1554883917975507, "learning_rate": 1.0724978225474827e-05, "loss": 0.9489, "step": 3496 }, { "epoch": 0.4927087002465657, "grad_norm": 0.9728649403709536, "learning_rate": 1.0720426543842262e-05, "loss": 0.8842, "step": 3497 }, { "epoch": 0.49284959492779146, "grad_norm": 0.9175274919463475, "learning_rate": 1.0715874712169465e-05, "loss": 0.859, "step": 3498 }, { "epoch": 0.49299048960901726, "grad_norm": 1.3794496054822678, "learning_rate": 1.0711322731404434e-05, "loss": 0.6238, "step": 3499 }, { "epoch": 0.49313138429024306, "grad_norm": 0.9564654568248735, "learning_rate": 1.0706770602495183e-05, "loss": 0.8881, "step": 3500 }, { "epoch": 0.4932722789714688, "grad_norm": 1.2190599365014245, "learning_rate": 1.0702218326389767e-05, "loss": 0.5682, "step": 3501 }, { "epoch": 0.4934131736526946, "grad_norm": 1.2344407417583487, "learning_rate": 1.0697665904036274e-05, "loss": 0.9134, "step": 3502 }, { "epoch": 0.4935540683339204, "grad_norm": 1.0155735369392571, "learning_rate": 1.0693113336382813e-05, "loss": 0.8618, "step": 3503 }, { "epoch": 0.4936949630151462, "grad_norm": 0.999232611561479, "learning_rate": 1.0688560624377528e-05, "loss": 0.8958, "step": 3504 }, { "epoch": 0.49383585769637195, "grad_norm": 0.9778456780464772, "learning_rate": 1.0684007768968599e-05, "loss": 0.8555, "step": 3505 }, { "epoch": 0.49397675237759775, "grad_norm": 0.9605157606855574, "learning_rate": 1.0679454771104225e-05, "loss": 0.8921, "step": 3506 }, { "epoch": 0.49411764705882355, "grad_norm": 1.3551154999109556, "learning_rate": 1.0674901631732642e-05, "loss": 0.5306, "step": 3507 }, { "epoch": 0.4942585417400493, "grad_norm": 1.0635577243295615, "learning_rate": 1.067034835180211e-05, "loss": 0.8942, "step": 3508 }, { "epoch": 0.4943994364212751, "grad_norm": 1.0324674118151678, "learning_rate": 1.0665794932260929e-05, "loss": 0.917, "step": 3509 }, { "epoch": 0.4945403311025009, "grad_norm": 1.0405570477242356, "learning_rate": 1.0661241374057416e-05, "loss": 0.8564, "step": 3510 }, { "epoch": 0.49468122578372664, "grad_norm": 1.1082264669835524, "learning_rate": 1.0656687678139916e-05, "loss": 0.9251, "step": 3511 }, { "epoch": 0.49482212046495244, "grad_norm": 1.220272100979454, "learning_rate": 1.0652133845456818e-05, "loss": 0.6016, "step": 3512 }, { "epoch": 0.49496301514617824, "grad_norm": 1.0270315750514523, "learning_rate": 1.0647579876956526e-05, "loss": 0.8841, "step": 3513 }, { "epoch": 0.49510390982740404, "grad_norm": 0.9571057689973678, "learning_rate": 1.0643025773587472e-05, "loss": 0.8527, "step": 3514 }, { "epoch": 0.4952448045086298, "grad_norm": 0.9279609539321536, "learning_rate": 1.0638471536298127e-05, "loss": 0.4614, "step": 3515 }, { "epoch": 0.4953856991898556, "grad_norm": 0.9824917428744242, "learning_rate": 1.0633917166036977e-05, "loss": 0.9442, "step": 3516 }, { "epoch": 0.4955265938710814, "grad_norm": 1.1492783961090929, "learning_rate": 1.0629362663752547e-05, "loss": 0.4622, "step": 3517 }, { "epoch": 0.49566748855230713, "grad_norm": 1.4201465018293165, "learning_rate": 1.062480803039338e-05, "loss": 0.5916, "step": 3518 }, { "epoch": 0.49580838323353293, "grad_norm": 1.0827874033022629, "learning_rate": 1.0620253266908054e-05, "loss": 0.8763, "step": 3519 }, { "epoch": 0.49594927791475873, "grad_norm": 1.03843949469375, "learning_rate": 1.0615698374245167e-05, "loss": 0.8189, "step": 3520 }, { "epoch": 0.4960901725959845, "grad_norm": 0.9612412375665418, "learning_rate": 1.0611143353353348e-05, "loss": 0.8805, "step": 3521 }, { "epoch": 0.4962310672772103, "grad_norm": 0.8954873862014698, "learning_rate": 1.0606588205181255e-05, "loss": 0.9124, "step": 3522 }, { "epoch": 0.4963719619584361, "grad_norm": 0.8992895846648116, "learning_rate": 1.0602032930677568e-05, "loss": 0.8322, "step": 3523 }, { "epoch": 0.4965128566396618, "grad_norm": 1.1312007657238488, "learning_rate": 1.0597477530790994e-05, "loss": 0.4676, "step": 3524 }, { "epoch": 0.4966537513208876, "grad_norm": 1.022864648253621, "learning_rate": 1.059292200647027e-05, "loss": 0.8411, "step": 3525 }, { "epoch": 0.4967946460021134, "grad_norm": 0.9432172539540781, "learning_rate": 1.058836635866415e-05, "loss": 0.8963, "step": 3526 }, { "epoch": 0.4969355406833392, "grad_norm": 1.0197457471780242, "learning_rate": 1.0583810588321429e-05, "loss": 0.8891, "step": 3527 }, { "epoch": 0.497076435364565, "grad_norm": 1.0429497137855908, "learning_rate": 1.057925469639091e-05, "loss": 0.8777, "step": 3528 }, { "epoch": 0.4972173300457908, "grad_norm": 1.0484124000130557, "learning_rate": 1.0574698683821433e-05, "loss": 0.9353, "step": 3529 }, { "epoch": 0.4973582247270166, "grad_norm": 0.9443992863670672, "learning_rate": 1.0570142551561859e-05, "loss": 0.9241, "step": 3530 }, { "epoch": 0.4974991194082423, "grad_norm": 1.024799689537246, "learning_rate": 1.0565586300561079e-05, "loss": 0.8563, "step": 3531 }, { "epoch": 0.4976400140894681, "grad_norm": 0.9904256619345065, "learning_rate": 1.0561029931767996e-05, "loss": 0.8402, "step": 3532 }, { "epoch": 0.4977809087706939, "grad_norm": 1.4423783961989805, "learning_rate": 1.0556473446131554e-05, "loss": 0.6454, "step": 3533 }, { "epoch": 0.49792180345191966, "grad_norm": 1.1170943904072757, "learning_rate": 1.0551916844600708e-05, "loss": 0.8699, "step": 3534 }, { "epoch": 0.49806269813314547, "grad_norm": 1.126734107853681, "learning_rate": 1.0547360128124442e-05, "loss": 0.9033, "step": 3535 }, { "epoch": 0.49820359281437127, "grad_norm": 0.9468017019787, "learning_rate": 1.0542803297651772e-05, "loss": 0.8732, "step": 3536 }, { "epoch": 0.49834448749559707, "grad_norm": 0.9123163446720567, "learning_rate": 1.0538246354131718e-05, "loss": 0.8926, "step": 3537 }, { "epoch": 0.4984853821768228, "grad_norm": 0.9903847629161868, "learning_rate": 1.0533689298513347e-05, "loss": 0.8578, "step": 3538 }, { "epoch": 0.4986262768580486, "grad_norm": 1.112979569480225, "learning_rate": 1.0529132131745732e-05, "loss": 0.8661, "step": 3539 }, { "epoch": 0.4987671715392744, "grad_norm": 0.9831080185433222, "learning_rate": 1.0524574854777974e-05, "loss": 0.8689, "step": 3540 }, { "epoch": 0.49890806622050016, "grad_norm": 0.9268639188505947, "learning_rate": 1.0520017468559203e-05, "loss": 0.8409, "step": 3541 }, { "epoch": 0.49904896090172596, "grad_norm": 1.026030328163837, "learning_rate": 1.0515459974038562e-05, "loss": 0.8642, "step": 3542 }, { "epoch": 0.49918985558295176, "grad_norm": 1.0906501191722888, "learning_rate": 1.0510902372165227e-05, "loss": 0.8448, "step": 3543 }, { "epoch": 0.4993307502641775, "grad_norm": 0.943616217696132, "learning_rate": 1.0506344663888381e-05, "loss": 0.8514, "step": 3544 }, { "epoch": 0.4994716449454033, "grad_norm": 1.4056650186418025, "learning_rate": 1.0501786850157247e-05, "loss": 0.4592, "step": 3545 }, { "epoch": 0.4996125396266291, "grad_norm": 1.02652485599591, "learning_rate": 1.0497228931921061e-05, "loss": 0.8939, "step": 3546 }, { "epoch": 0.4997534343078549, "grad_norm": 1.3743142887301862, "learning_rate": 1.0492670910129082e-05, "loss": 0.6846, "step": 3547 }, { "epoch": 0.49989432898908065, "grad_norm": 0.9961271886561408, "learning_rate": 1.048811278573059e-05, "loss": 0.8803, "step": 3548 }, { "epoch": 0.5000352236703064, "grad_norm": 1.1070707210153334, "learning_rate": 1.048355455967488e-05, "loss": 0.4675, "step": 3549 }, { "epoch": 0.5001761183515322, "grad_norm": 1.061162111432137, "learning_rate": 1.047899623291128e-05, "loss": 0.8503, "step": 3550 }, { "epoch": 0.500317013032758, "grad_norm": 0.965875955631934, "learning_rate": 1.0474437806389139e-05, "loss": 0.8853, "step": 3551 }, { "epoch": 0.5004579077139838, "grad_norm": 0.9425797656532743, "learning_rate": 1.0469879281057813e-05, "loss": 0.857, "step": 3552 }, { "epoch": 0.5005988023952096, "grad_norm": 1.0004890293993762, "learning_rate": 1.046532065786669e-05, "loss": 0.8713, "step": 3553 }, { "epoch": 0.5007396970764354, "grad_norm": 1.0422539306457779, "learning_rate": 1.0460761937765177e-05, "loss": 0.8774, "step": 3554 }, { "epoch": 0.5008805917576612, "grad_norm": 1.1777179035990777, "learning_rate": 1.0456203121702693e-05, "loss": 0.8542, "step": 3555 }, { "epoch": 0.5010214864388869, "grad_norm": 1.1272953164930928, "learning_rate": 1.0451644210628693e-05, "loss": 0.8647, "step": 3556 }, { "epoch": 0.5011623811201127, "grad_norm": 1.3399520921953485, "learning_rate": 1.0447085205492637e-05, "loss": 0.6475, "step": 3557 }, { "epoch": 0.5013032758013385, "grad_norm": 1.1046867417812984, "learning_rate": 1.044252610724401e-05, "loss": 0.8972, "step": 3558 }, { "epoch": 0.5014441704825643, "grad_norm": 1.2933699718137759, "learning_rate": 1.043796691683232e-05, "loss": 0.5205, "step": 3559 }, { "epoch": 0.5015850651637901, "grad_norm": 1.2397754951237956, "learning_rate": 1.0433407635207087e-05, "loss": 0.5416, "step": 3560 }, { "epoch": 0.5017259598450159, "grad_norm": 1.0592973362218294, "learning_rate": 1.0428848263317854e-05, "loss": 0.8402, "step": 3561 }, { "epoch": 0.5018668545262416, "grad_norm": 1.027408958191889, "learning_rate": 1.0424288802114184e-05, "loss": 0.8835, "step": 3562 }, { "epoch": 0.5020077492074674, "grad_norm": 1.327443641600216, "learning_rate": 1.0419729252545654e-05, "loss": 0.5492, "step": 3563 }, { "epoch": 0.5021486438886932, "grad_norm": 0.9911728970535603, "learning_rate": 1.0415169615561867e-05, "loss": 0.8879, "step": 3564 }, { "epoch": 0.502289538569919, "grad_norm": 1.3063406193596059, "learning_rate": 1.0410609892112435e-05, "loss": 0.5041, "step": 3565 }, { "epoch": 0.5024304332511448, "grad_norm": 1.2315724486331336, "learning_rate": 1.0406050083146995e-05, "loss": 0.8806, "step": 3566 }, { "epoch": 0.5025713279323706, "grad_norm": 1.0798307620549694, "learning_rate": 1.0401490189615198e-05, "loss": 0.8972, "step": 3567 }, { "epoch": 0.5027122226135964, "grad_norm": 1.0231395645290506, "learning_rate": 1.0396930212466715e-05, "loss": 0.8228, "step": 3568 }, { "epoch": 0.5028531172948221, "grad_norm": 1.0391979532620772, "learning_rate": 1.0392370152651234e-05, "loss": 0.9025, "step": 3569 }, { "epoch": 0.5029940119760479, "grad_norm": 1.0351929448659767, "learning_rate": 1.0387810011118457e-05, "loss": 0.8411, "step": 3570 }, { "epoch": 0.5031349066572737, "grad_norm": 1.2575599917420488, "learning_rate": 1.0383249788818109e-05, "loss": 0.5865, "step": 3571 }, { "epoch": 0.5032758013384995, "grad_norm": 1.0568501288878605, "learning_rate": 1.0378689486699927e-05, "loss": 0.9396, "step": 3572 }, { "epoch": 0.5034166960197253, "grad_norm": 1.3667027519399104, "learning_rate": 1.0374129105713665e-05, "loss": 0.6558, "step": 3573 }, { "epoch": 0.5035575907009511, "grad_norm": 1.0742020972439636, "learning_rate": 1.0369568646809096e-05, "loss": 0.8525, "step": 3574 }, { "epoch": 0.5036984853821769, "grad_norm": 1.2074136008289569, "learning_rate": 1.0365008110936009e-05, "loss": 0.9078, "step": 3575 }, { "epoch": 0.5038393800634026, "grad_norm": 1.0569950692220789, "learning_rate": 1.0360447499044201e-05, "loss": 0.8773, "step": 3576 }, { "epoch": 0.5039802747446284, "grad_norm": 0.9856189324944866, "learning_rate": 1.0355886812083502e-05, "loss": 0.7777, "step": 3577 }, { "epoch": 0.5041211694258542, "grad_norm": 0.942249580572161, "learning_rate": 1.0351326051003738e-05, "loss": 0.888, "step": 3578 }, { "epoch": 0.50426206410708, "grad_norm": 1.0151797653437207, "learning_rate": 1.0346765216754765e-05, "loss": 0.8626, "step": 3579 }, { "epoch": 0.5044029587883058, "grad_norm": 1.0679026968268883, "learning_rate": 1.0342204310286448e-05, "loss": 0.8528, "step": 3580 }, { "epoch": 0.5045438534695316, "grad_norm": 1.1445125501569469, "learning_rate": 1.0337643332548667e-05, "loss": 0.9338, "step": 3581 }, { "epoch": 0.5046847481507573, "grad_norm": 1.3456930833350957, "learning_rate": 1.033308228449132e-05, "loss": 0.536, "step": 3582 }, { "epoch": 0.5048256428319831, "grad_norm": 1.0948168501890996, "learning_rate": 1.0328521167064313e-05, "loss": 0.8648, "step": 3583 }, { "epoch": 0.5049665375132089, "grad_norm": 0.9472863560177295, "learning_rate": 1.0323959981217575e-05, "loss": 0.8636, "step": 3584 }, { "epoch": 0.5051074321944347, "grad_norm": 1.289937661984628, "learning_rate": 1.0319398727901044e-05, "loss": 0.506, "step": 3585 }, { "epoch": 0.5052483268756605, "grad_norm": 1.1805218403393036, "learning_rate": 1.0314837408064671e-05, "loss": 0.5258, "step": 3586 }, { "epoch": 0.5053892215568863, "grad_norm": 0.9819215291913431, "learning_rate": 1.0310276022658427e-05, "loss": 0.8609, "step": 3587 }, { "epoch": 0.5055301162381121, "grad_norm": 1.278285653452631, "learning_rate": 1.030571457263229e-05, "loss": 0.484, "step": 3588 }, { "epoch": 0.5056710109193377, "grad_norm": 1.2607687889166819, "learning_rate": 1.0301153058936253e-05, "loss": 0.5499, "step": 3589 }, { "epoch": 0.5058119056005635, "grad_norm": 1.3643900058525298, "learning_rate": 1.0296591482520329e-05, "loss": 0.457, "step": 3590 }, { "epoch": 0.5059528002817893, "grad_norm": 0.9914359970773363, "learning_rate": 1.029202984433453e-05, "loss": 0.8896, "step": 3591 }, { "epoch": 0.5060936949630152, "grad_norm": 0.9589458442135633, "learning_rate": 1.0287468145328899e-05, "loss": 0.848, "step": 3592 }, { "epoch": 0.506234589644241, "grad_norm": 1.0378903794064047, "learning_rate": 1.0282906386453476e-05, "loss": 0.8211, "step": 3593 }, { "epoch": 0.5063754843254668, "grad_norm": 1.4897301121967632, "learning_rate": 1.0278344568658318e-05, "loss": 0.6411, "step": 3594 }, { "epoch": 0.5065163790066926, "grad_norm": 1.094257502924287, "learning_rate": 1.0273782692893502e-05, "loss": 0.8981, "step": 3595 }, { "epoch": 0.5066572736879182, "grad_norm": 0.9965141616482832, "learning_rate": 1.0269220760109103e-05, "loss": 0.892, "step": 3596 }, { "epoch": 0.506798168369144, "grad_norm": 1.2207278415645524, "learning_rate": 1.026465877125522e-05, "loss": 0.5819, "step": 3597 }, { "epoch": 0.5069390630503698, "grad_norm": 0.9416479912370506, "learning_rate": 1.0260096727281963e-05, "loss": 0.8577, "step": 3598 }, { "epoch": 0.5070799577315956, "grad_norm": 1.25585067579555, "learning_rate": 1.0255534629139442e-05, "loss": 0.5129, "step": 3599 }, { "epoch": 0.5072208524128214, "grad_norm": 1.2754545837805276, "learning_rate": 1.0250972477777795e-05, "loss": 0.5343, "step": 3600 }, { "epoch": 0.5073617470940472, "grad_norm": 1.005248255309822, "learning_rate": 1.0246410274147151e-05, "loss": 0.8196, "step": 3601 }, { "epoch": 0.5075026417752729, "grad_norm": 1.0524875593527825, "learning_rate": 1.0241848019197672e-05, "loss": 0.9007, "step": 3602 }, { "epoch": 0.5076435364564987, "grad_norm": 0.9469938368941644, "learning_rate": 1.0237285713879515e-05, "loss": 0.8493, "step": 3603 }, { "epoch": 0.5077844311377245, "grad_norm": 0.9526635800648449, "learning_rate": 1.0232723359142851e-05, "loss": 0.9119, "step": 3604 }, { "epoch": 0.5079253258189503, "grad_norm": 1.0080475273160792, "learning_rate": 1.0228160955937868e-05, "loss": 0.923, "step": 3605 }, { "epoch": 0.5080662205001761, "grad_norm": 0.9057573106596774, "learning_rate": 1.0223598505214749e-05, "loss": 0.8883, "step": 3606 }, { "epoch": 0.5082071151814019, "grad_norm": 0.9576736940459387, "learning_rate": 1.0219036007923705e-05, "loss": 0.8376, "step": 3607 }, { "epoch": 0.5083480098626277, "grad_norm": 1.37565110503957, "learning_rate": 1.0214473465014948e-05, "loss": 0.5374, "step": 3608 }, { "epoch": 0.5084889045438534, "grad_norm": 0.9899299362782373, "learning_rate": 1.0209910877438695e-05, "loss": 0.9038, "step": 3609 }, { "epoch": 0.5086297992250792, "grad_norm": 1.2341944552439945, "learning_rate": 1.0205348246145181e-05, "loss": 0.5041, "step": 3610 }, { "epoch": 0.508770693906305, "grad_norm": 1.2401048473579646, "learning_rate": 1.0200785572084644e-05, "loss": 0.5832, "step": 3611 }, { "epoch": 0.5089115885875308, "grad_norm": 0.9972174010481737, "learning_rate": 1.0196222856207335e-05, "loss": 0.9033, "step": 3612 }, { "epoch": 0.5090524832687566, "grad_norm": 1.3457627914109138, "learning_rate": 1.019166009946351e-05, "loss": 0.5149, "step": 3613 }, { "epoch": 0.5091933779499824, "grad_norm": 0.9581829038351689, "learning_rate": 1.0187097302803434e-05, "loss": 0.8836, "step": 3614 }, { "epoch": 0.5093342726312081, "grad_norm": 0.9159182661293053, "learning_rate": 1.0182534467177386e-05, "loss": 0.8325, "step": 3615 }, { "epoch": 0.5094751673124339, "grad_norm": 0.9767229172694284, "learning_rate": 1.0177971593535645e-05, "loss": 0.8147, "step": 3616 }, { "epoch": 0.5096160619936597, "grad_norm": 1.3939629811176455, "learning_rate": 1.0173408682828502e-05, "loss": 0.5917, "step": 3617 }, { "epoch": 0.5097569566748855, "grad_norm": 1.2827027667811972, "learning_rate": 1.0168845736006259e-05, "loss": 0.5236, "step": 3618 }, { "epoch": 0.5098978513561113, "grad_norm": 0.9540927235523082, "learning_rate": 1.0164282754019217e-05, "loss": 0.8134, "step": 3619 }, { "epoch": 0.5100387460373371, "grad_norm": 1.0509352759897974, "learning_rate": 1.0159719737817692e-05, "loss": 0.8504, "step": 3620 }, { "epoch": 0.5101796407185629, "grad_norm": 0.9656164423559007, "learning_rate": 1.0155156688352006e-05, "loss": 0.8065, "step": 3621 }, { "epoch": 0.5103205353997886, "grad_norm": 1.452578592395728, "learning_rate": 1.015059360657248e-05, "loss": 0.4308, "step": 3622 }, { "epoch": 0.5104614300810144, "grad_norm": 1.3287836920483131, "learning_rate": 1.0146030493429458e-05, "loss": 0.535, "step": 3623 }, { "epoch": 0.5106023247622402, "grad_norm": 1.1389516007423124, "learning_rate": 1.0141467349873268e-05, "loss": 0.9556, "step": 3624 }, { "epoch": 0.510743219443466, "grad_norm": 0.8668501045614508, "learning_rate": 1.0136904176854266e-05, "loss": 0.8394, "step": 3625 }, { "epoch": 0.5108841141246918, "grad_norm": 0.9884404099354429, "learning_rate": 1.0132340975322803e-05, "loss": 0.937, "step": 3626 }, { "epoch": 0.5110250088059176, "grad_norm": 0.8907634274544365, "learning_rate": 1.0127777746229237e-05, "loss": 0.8216, "step": 3627 }, { "epoch": 0.5111659034871434, "grad_norm": 1.1939377998779506, "learning_rate": 1.0123214490523934e-05, "loss": 0.8913, "step": 3628 }, { "epoch": 0.5113067981683691, "grad_norm": 1.1221225303562976, "learning_rate": 1.0118651209157262e-05, "loss": 0.478, "step": 3629 }, { "epoch": 0.5114476928495949, "grad_norm": 0.9754406077682335, "learning_rate": 1.0114087903079598e-05, "loss": 0.8727, "step": 3630 }, { "epoch": 0.5115885875308207, "grad_norm": 0.910037494279695, "learning_rate": 1.0109524573241324e-05, "loss": 0.9017, "step": 3631 }, { "epoch": 0.5117294822120465, "grad_norm": 1.2162267787303338, "learning_rate": 1.0104961220592822e-05, "loss": 0.5311, "step": 3632 }, { "epoch": 0.5118703768932723, "grad_norm": 0.9663039953733907, "learning_rate": 1.0100397846084486e-05, "loss": 0.9337, "step": 3633 }, { "epoch": 0.5120112715744981, "grad_norm": 0.9246239308082554, "learning_rate": 1.0095834450666707e-05, "loss": 0.8715, "step": 3634 }, { "epoch": 0.5121521662557238, "grad_norm": 0.9732828244119013, "learning_rate": 1.0091271035289891e-05, "loss": 0.8399, "step": 3635 }, { "epoch": 0.5122930609369496, "grad_norm": 1.1036330341854206, "learning_rate": 1.0086707600904438e-05, "loss": 0.8823, "step": 3636 }, { "epoch": 0.5124339556181754, "grad_norm": 0.9541910069008019, "learning_rate": 1.008214414846075e-05, "loss": 0.9114, "step": 3637 }, { "epoch": 0.5125748502994012, "grad_norm": 1.2952102529772354, "learning_rate": 1.0077580678909245e-05, "loss": 0.5985, "step": 3638 }, { "epoch": 0.512715744980627, "grad_norm": 0.8976615385999495, "learning_rate": 1.0073017193200336e-05, "loss": 0.8932, "step": 3639 }, { "epoch": 0.5128566396618528, "grad_norm": 0.9598189393976109, "learning_rate": 1.006845369228444e-05, "loss": 0.8826, "step": 3640 }, { "epoch": 0.5129975343430786, "grad_norm": 1.152677982492216, "learning_rate": 1.0063890177111978e-05, "loss": 0.8549, "step": 3641 }, { "epoch": 0.5131384290243043, "grad_norm": 0.9891897170800983, "learning_rate": 1.005932664863338e-05, "loss": 0.8098, "step": 3642 }, { "epoch": 0.5132793237055301, "grad_norm": 0.9630176536630253, "learning_rate": 1.0054763107799065e-05, "loss": 0.8953, "step": 3643 }, { "epoch": 0.5134202183867559, "grad_norm": 1.0095525001851358, "learning_rate": 1.005019955555947e-05, "loss": 0.8378, "step": 3644 }, { "epoch": 0.5135611130679817, "grad_norm": 0.9567534533722751, "learning_rate": 1.0045635992865019e-05, "loss": 0.8891, "step": 3645 }, { "epoch": 0.5137020077492075, "grad_norm": 0.988780617095774, "learning_rate": 1.004107242066615e-05, "loss": 0.825, "step": 3646 }, { "epoch": 0.5138429024304333, "grad_norm": 1.377519944731037, "learning_rate": 1.0036508839913301e-05, "loss": 0.5369, "step": 3647 }, { "epoch": 0.513983797111659, "grad_norm": 0.9843478816739749, "learning_rate": 1.003194525155691e-05, "loss": 0.8866, "step": 3648 }, { "epoch": 0.5141246917928848, "grad_norm": 1.4058023904968802, "learning_rate": 1.0027381656547412e-05, "loss": 0.5894, "step": 3649 }, { "epoch": 0.5142655864741106, "grad_norm": 1.646810611437301, "learning_rate": 1.002281805583525e-05, "loss": 0.7004, "step": 3650 }, { "epoch": 0.5144064811553364, "grad_norm": 1.1971334477761761, "learning_rate": 1.0018254450370868e-05, "loss": 0.4962, "step": 3651 }, { "epoch": 0.5145473758365622, "grad_norm": 1.0501691033415401, "learning_rate": 1.001369084110471e-05, "loss": 0.8227, "step": 3652 }, { "epoch": 0.514688270517788, "grad_norm": 1.2526353042161336, "learning_rate": 1.0009127228987215e-05, "loss": 0.8444, "step": 3653 }, { "epoch": 0.5148291651990138, "grad_norm": 1.045125014341088, "learning_rate": 1.0004563614968831e-05, "loss": 0.8384, "step": 3654 }, { "epoch": 0.5149700598802395, "grad_norm": 1.0210241805220175, "learning_rate": 1e-05, "loss": 0.9464, "step": 3655 }, { "epoch": 0.5151109545614653, "grad_norm": 1.2817022032131133, "learning_rate": 9.995436385031174e-06, "loss": 0.547, "step": 3656 }, { "epoch": 0.5152518492426911, "grad_norm": 0.9781884449071528, "learning_rate": 9.990872771012788e-06, "loss": 0.9042, "step": 3657 }, { "epoch": 0.5153927439239169, "grad_norm": 0.9782180509526351, "learning_rate": 9.986309158895296e-06, "loss": 0.8877, "step": 3658 }, { "epoch": 0.5155336386051427, "grad_norm": 1.0704934618141277, "learning_rate": 9.981745549629135e-06, "loss": 0.9125, "step": 3659 }, { "epoch": 0.5156745332863685, "grad_norm": 1.0203128140583007, "learning_rate": 9.977181944164754e-06, "loss": 0.8723, "step": 3660 }, { "epoch": 0.5158154279675943, "grad_norm": 1.0775472060645341, "learning_rate": 9.972618343452591e-06, "loss": 0.9472, "step": 3661 }, { "epoch": 0.51595632264882, "grad_norm": 1.2695547125006599, "learning_rate": 9.968054748443097e-06, "loss": 0.4783, "step": 3662 }, { "epoch": 0.5160972173300458, "grad_norm": 1.0598896477246154, "learning_rate": 9.9634911600867e-06, "loss": 0.8811, "step": 3663 }, { "epoch": 0.5162381120112716, "grad_norm": 1.1783422450813323, "learning_rate": 9.958927579333852e-06, "loss": 0.5518, "step": 3664 }, { "epoch": 0.5163790066924974, "grad_norm": 1.371431838431754, "learning_rate": 9.954364007134984e-06, "loss": 0.5677, "step": 3665 }, { "epoch": 0.5165199013737232, "grad_norm": 1.0027466353430448, "learning_rate": 9.949800444440536e-06, "loss": 0.8835, "step": 3666 }, { "epoch": 0.516660796054949, "grad_norm": 0.9401751353039649, "learning_rate": 9.945236892200936e-06, "loss": 0.8919, "step": 3667 }, { "epoch": 0.5168016907361747, "grad_norm": 1.2220098747766714, "learning_rate": 9.940673351366625e-06, "loss": 0.5123, "step": 3668 }, { "epoch": 0.5169425854174005, "grad_norm": 0.9767204015406764, "learning_rate": 9.936109822888023e-06, "loss": 0.8509, "step": 3669 }, { "epoch": 0.5170834800986263, "grad_norm": 0.9370175852351798, "learning_rate": 9.931546307715564e-06, "loss": 0.8601, "step": 3670 }, { "epoch": 0.5172243747798521, "grad_norm": 1.2307634836240617, "learning_rate": 9.926982806799666e-06, "loss": 0.4673, "step": 3671 }, { "epoch": 0.5173652694610779, "grad_norm": 1.2802797091361737, "learning_rate": 9.922419321090758e-06, "loss": 0.5019, "step": 3672 }, { "epoch": 0.5175061641423037, "grad_norm": 1.1261112490482275, "learning_rate": 9.917855851539253e-06, "loss": 0.4836, "step": 3673 }, { "epoch": 0.5176470588235295, "grad_norm": 1.013532322311488, "learning_rate": 9.913292399095569e-06, "loss": 0.8645, "step": 3674 }, { "epoch": 0.5177879535047551, "grad_norm": 1.0572581502441187, "learning_rate": 9.908728964710112e-06, "loss": 0.8987, "step": 3675 }, { "epoch": 0.517928848185981, "grad_norm": 1.333798264924761, "learning_rate": 9.904165549333296e-06, "loss": 0.5677, "step": 3676 }, { "epoch": 0.5180697428672068, "grad_norm": 1.0000483521324042, "learning_rate": 9.899602153915517e-06, "loss": 0.8788, "step": 3677 }, { "epoch": 0.5182106375484326, "grad_norm": 1.0765165007599171, "learning_rate": 9.895038779407182e-06, "loss": 0.8974, "step": 3678 }, { "epoch": 0.5183515322296584, "grad_norm": 1.010884026861272, "learning_rate": 9.890475426758678e-06, "loss": 0.8909, "step": 3679 }, { "epoch": 0.5184924269108842, "grad_norm": 0.8816742348750046, "learning_rate": 9.885912096920406e-06, "loss": 0.8711, "step": 3680 }, { "epoch": 0.51863332159211, "grad_norm": 1.0248681919835074, "learning_rate": 9.88134879084274e-06, "loss": 0.8172, "step": 3681 }, { "epoch": 0.5187742162733356, "grad_norm": 1.2470286988149428, "learning_rate": 9.87678550947607e-06, "loss": 0.4296, "step": 3682 }, { "epoch": 0.5189151109545614, "grad_norm": 1.3107769113496166, "learning_rate": 9.872222253770765e-06, "loss": 0.5785, "step": 3683 }, { "epoch": 0.5190560056357872, "grad_norm": 1.1116206855519624, "learning_rate": 9.8676590246772e-06, "loss": 0.9466, "step": 3684 }, { "epoch": 0.519196900317013, "grad_norm": 1.0102571659417767, "learning_rate": 9.863095823145736e-06, "loss": 0.8302, "step": 3685 }, { "epoch": 0.5193377949982388, "grad_norm": 1.5045725010724953, "learning_rate": 9.858532650126735e-06, "loss": 0.5838, "step": 3686 }, { "epoch": 0.5194786896794646, "grad_norm": 1.312762275403731, "learning_rate": 9.853969506570547e-06, "loss": 0.698, "step": 3687 }, { "epoch": 0.5196195843606903, "grad_norm": 1.0609218034179728, "learning_rate": 9.849406393427521e-06, "loss": 0.8838, "step": 3688 }, { "epoch": 0.5197604790419161, "grad_norm": 1.354004336642371, "learning_rate": 9.844843311647994e-06, "loss": 0.5687, "step": 3689 }, { "epoch": 0.5199013737231419, "grad_norm": 1.137451734337601, "learning_rate": 9.84028026218231e-06, "loss": 0.4426, "step": 3690 }, { "epoch": 0.5200422684043677, "grad_norm": 0.9981175911314719, "learning_rate": 9.835717245980783e-06, "loss": 0.898, "step": 3691 }, { "epoch": 0.5201831630855935, "grad_norm": 1.2795139576702528, "learning_rate": 9.831154263993743e-06, "loss": 0.559, "step": 3692 }, { "epoch": 0.5203240577668193, "grad_norm": 0.9496884488963173, "learning_rate": 9.826591317171498e-06, "loss": 0.8635, "step": 3693 }, { "epoch": 0.5204649524480451, "grad_norm": 0.944332792300532, "learning_rate": 9.822028406464357e-06, "loss": 0.854, "step": 3694 }, { "epoch": 0.5206058471292708, "grad_norm": 0.8585908348034663, "learning_rate": 9.817465532822616e-06, "loss": 0.8223, "step": 3695 }, { "epoch": 0.5207467418104966, "grad_norm": 1.0572459070434623, "learning_rate": 9.812902697196569e-06, "loss": 0.845, "step": 3696 }, { "epoch": 0.5208876364917224, "grad_norm": 1.3405148288518993, "learning_rate": 9.808339900536495e-06, "loss": 0.5692, "step": 3697 }, { "epoch": 0.5210285311729482, "grad_norm": 1.3035322413687327, "learning_rate": 9.80377714379267e-06, "loss": 0.5886, "step": 3698 }, { "epoch": 0.521169425854174, "grad_norm": 1.0007778495467285, "learning_rate": 9.79921442791536e-06, "loss": 0.9366, "step": 3699 }, { "epoch": 0.5213103205353998, "grad_norm": 1.3162735305725588, "learning_rate": 9.79465175385482e-06, "loss": 0.5589, "step": 3700 }, { "epoch": 0.5214512152166255, "grad_norm": 0.9056692976700176, "learning_rate": 9.79008912256131e-06, "loss": 0.8633, "step": 3701 }, { "epoch": 0.5215921098978513, "grad_norm": 0.9328728481077637, "learning_rate": 9.785526534985055e-06, "loss": 0.8608, "step": 3702 }, { "epoch": 0.5217330045790771, "grad_norm": 0.9699305545117862, "learning_rate": 9.780963992076296e-06, "loss": 0.8168, "step": 3703 }, { "epoch": 0.5218738992603029, "grad_norm": 1.2920129924548156, "learning_rate": 9.776401494785253e-06, "loss": 0.566, "step": 3704 }, { "epoch": 0.5220147939415287, "grad_norm": 1.0983905093760127, "learning_rate": 9.771839044062137e-06, "loss": 0.8664, "step": 3705 }, { "epoch": 0.5221556886227545, "grad_norm": 0.9787595446180914, "learning_rate": 9.767276640857152e-06, "loss": 0.8202, "step": 3706 }, { "epoch": 0.5222965833039803, "grad_norm": 0.9916554167667317, "learning_rate": 9.76271428612049e-06, "loss": 0.8385, "step": 3707 }, { "epoch": 0.522437477985206, "grad_norm": 1.2140090238698218, "learning_rate": 9.758151980802332e-06, "loss": 0.4248, "step": 3708 }, { "epoch": 0.5225783726664318, "grad_norm": 1.3302711414536097, "learning_rate": 9.753589725852852e-06, "loss": 0.4569, "step": 3709 }, { "epoch": 0.5227192673476576, "grad_norm": 1.0099155010318024, "learning_rate": 9.749027522222209e-06, "loss": 0.8848, "step": 3710 }, { "epoch": 0.5228601620288834, "grad_norm": 0.8743561791392057, "learning_rate": 9.744465370860563e-06, "loss": 0.862, "step": 3711 }, { "epoch": 0.5230010567101092, "grad_norm": 1.105714144525463, "learning_rate": 9.73990327271804e-06, "loss": 0.8504, "step": 3712 }, { "epoch": 0.523141951391335, "grad_norm": 0.9011500401865635, "learning_rate": 9.735341228744783e-06, "loss": 0.8705, "step": 3713 }, { "epoch": 0.5232828460725608, "grad_norm": 1.3040864245333463, "learning_rate": 9.730779239890899e-06, "loss": 0.455, "step": 3714 }, { "epoch": 0.5234237407537865, "grad_norm": 0.9474888640951759, "learning_rate": 9.726217307106503e-06, "loss": 0.9246, "step": 3715 }, { "epoch": 0.5235646354350123, "grad_norm": 0.9464736112461534, "learning_rate": 9.721655431341685e-06, "loss": 0.887, "step": 3716 }, { "epoch": 0.5237055301162381, "grad_norm": 1.0853882655934843, "learning_rate": 9.71709361354653e-06, "loss": 0.3977, "step": 3717 }, { "epoch": 0.5238464247974639, "grad_norm": 0.9704595872645313, "learning_rate": 9.712531854671105e-06, "loss": 0.8443, "step": 3718 }, { "epoch": 0.5239873194786897, "grad_norm": 1.0529053386859304, "learning_rate": 9.707970155665471e-06, "loss": 0.8499, "step": 3719 }, { "epoch": 0.5241282141599155, "grad_norm": 0.9265549535162839, "learning_rate": 9.703408517479673e-06, "loss": 0.8335, "step": 3720 }, { "epoch": 0.5242691088411412, "grad_norm": 1.1329937015845906, "learning_rate": 9.69884694106375e-06, "loss": 0.8615, "step": 3721 }, { "epoch": 0.524410003522367, "grad_norm": 1.299581122141163, "learning_rate": 9.69428542736771e-06, "loss": 0.5071, "step": 3722 }, { "epoch": 0.5245508982035928, "grad_norm": 1.180960738554737, "learning_rate": 9.689723977341576e-06, "loss": 0.6271, "step": 3723 }, { "epoch": 0.5246917928848186, "grad_norm": 1.214904777883865, "learning_rate": 9.68516259193533e-06, "loss": 0.5234, "step": 3724 }, { "epoch": 0.5248326875660444, "grad_norm": 0.9506791249646154, "learning_rate": 9.68060127209896e-06, "loss": 0.837, "step": 3725 }, { "epoch": 0.5249735822472702, "grad_norm": 0.9794465191433044, "learning_rate": 9.676040018782427e-06, "loss": 0.8949, "step": 3726 }, { "epoch": 0.525114476928496, "grad_norm": 1.186609332107371, "learning_rate": 9.67147883293569e-06, "loss": 0.4691, "step": 3727 }, { "epoch": 0.5252553716097217, "grad_norm": 1.2864156436899565, "learning_rate": 9.66691771550868e-06, "loss": 0.8705, "step": 3728 }, { "epoch": 0.5253962662909475, "grad_norm": 1.1767411597962647, "learning_rate": 9.662356667451336e-06, "loss": 0.4961, "step": 3729 }, { "epoch": 0.5255371609721733, "grad_norm": 0.9718580202877476, "learning_rate": 9.657795689713552e-06, "loss": 0.8423, "step": 3730 }, { "epoch": 0.5256780556533991, "grad_norm": 0.9268744838963366, "learning_rate": 9.653234783245236e-06, "loss": 0.8904, "step": 3731 }, { "epoch": 0.5258189503346249, "grad_norm": 1.2665983662809852, "learning_rate": 9.648673948996262e-06, "loss": 0.5334, "step": 3732 }, { "epoch": 0.5259598450158507, "grad_norm": 1.0346590020466726, "learning_rate": 9.644113187916503e-06, "loss": 0.8471, "step": 3733 }, { "epoch": 0.5261007396970764, "grad_norm": 1.0805010845313974, "learning_rate": 9.639552500955799e-06, "loss": 0.958, "step": 3734 }, { "epoch": 0.5262416343783022, "grad_norm": 0.9740287228874898, "learning_rate": 9.634991889063996e-06, "loss": 0.8598, "step": 3735 }, { "epoch": 0.526382529059528, "grad_norm": 1.072991248988073, "learning_rate": 9.630431353190906e-06, "loss": 0.9055, "step": 3736 }, { "epoch": 0.5265234237407538, "grad_norm": 1.4161643754632902, "learning_rate": 9.625870894286339e-06, "loss": 0.5244, "step": 3737 }, { "epoch": 0.5266643184219796, "grad_norm": 1.188518688517241, "learning_rate": 9.621310513300073e-06, "loss": 0.9023, "step": 3738 }, { "epoch": 0.5268052131032054, "grad_norm": 1.264521995063254, "learning_rate": 9.616750211181894e-06, "loss": 0.8664, "step": 3739 }, { "epoch": 0.5269461077844312, "grad_norm": 1.038397791136857, "learning_rate": 9.612189988881548e-06, "loss": 0.9146, "step": 3740 }, { "epoch": 0.5270870024656569, "grad_norm": 1.0170229584892396, "learning_rate": 9.607629847348769e-06, "loss": 0.8356, "step": 3741 }, { "epoch": 0.5272278971468827, "grad_norm": 1.0190337518877042, "learning_rate": 9.60306978753329e-06, "loss": 0.8686, "step": 3742 }, { "epoch": 0.5273687918281085, "grad_norm": 0.9707612274568634, "learning_rate": 9.598509810384804e-06, "loss": 0.8805, "step": 3743 }, { "epoch": 0.5275096865093343, "grad_norm": 1.0745916564784128, "learning_rate": 9.59394991685301e-06, "loss": 0.8446, "step": 3744 }, { "epoch": 0.5276505811905601, "grad_norm": 1.040073402842234, "learning_rate": 9.589390107887568e-06, "loss": 0.8537, "step": 3745 }, { "epoch": 0.5277914758717859, "grad_norm": 1.0408858724124865, "learning_rate": 9.584830384438138e-06, "loss": 0.8619, "step": 3746 }, { "epoch": 0.5279323705530117, "grad_norm": 1.0488935316820995, "learning_rate": 9.580270747454348e-06, "loss": 0.8412, "step": 3747 }, { "epoch": 0.5280732652342374, "grad_norm": 0.9644845876194722, "learning_rate": 9.575711197885821e-06, "loss": 0.9235, "step": 3748 }, { "epoch": 0.5282141599154632, "grad_norm": 0.935587020320347, "learning_rate": 9.571151736682149e-06, "loss": 0.8429, "step": 3749 }, { "epoch": 0.528355054596689, "grad_norm": 1.2638049757914869, "learning_rate": 9.566592364792918e-06, "loss": 0.5967, "step": 3750 }, { "epoch": 0.5284959492779148, "grad_norm": 1.2157433172927194, "learning_rate": 9.562033083167681e-06, "loss": 0.9238, "step": 3751 }, { "epoch": 0.5286368439591406, "grad_norm": 0.9280563288239179, "learning_rate": 9.557473892755995e-06, "loss": 0.8803, "step": 3752 }, { "epoch": 0.5287777386403664, "grad_norm": 1.101351880455812, "learning_rate": 9.552914794507365e-06, "loss": 0.8303, "step": 3753 }, { "epoch": 0.5289186333215921, "grad_norm": 1.250233825055432, "learning_rate": 9.54835578937131e-06, "loss": 0.475, "step": 3754 }, { "epoch": 0.5290595280028179, "grad_norm": 1.2387251228620346, "learning_rate": 9.543796878297308e-06, "loss": 0.5076, "step": 3755 }, { "epoch": 0.5292004226840437, "grad_norm": 1.0827847244879543, "learning_rate": 9.53923806223483e-06, "loss": 0.8577, "step": 3756 }, { "epoch": 0.5293413173652695, "grad_norm": 1.0691370511894482, "learning_rate": 9.534679342133316e-06, "loss": 0.8719, "step": 3757 }, { "epoch": 0.5294822120464953, "grad_norm": 1.303544140553178, "learning_rate": 9.530120718942192e-06, "loss": 0.5652, "step": 3758 }, { "epoch": 0.5296231067277211, "grad_norm": 1.1677611507250378, "learning_rate": 9.525562193610863e-06, "loss": 0.4577, "step": 3759 }, { "epoch": 0.5297640014089469, "grad_norm": 0.9658775409648197, "learning_rate": 9.521003767088723e-06, "loss": 0.8715, "step": 3760 }, { "epoch": 0.5299048960901726, "grad_norm": 1.4449099101692096, "learning_rate": 9.516445440325121e-06, "loss": 0.5838, "step": 3761 }, { "epoch": 0.5300457907713984, "grad_norm": 0.9393821019531967, "learning_rate": 9.511887214269416e-06, "loss": 0.9275, "step": 3762 }, { "epoch": 0.5301866854526242, "grad_norm": 1.3304341977866565, "learning_rate": 9.50732908987092e-06, "loss": 0.6445, "step": 3763 }, { "epoch": 0.53032758013385, "grad_norm": 1.0996451393977782, "learning_rate": 9.50277106807894e-06, "loss": 0.8907, "step": 3764 }, { "epoch": 0.5304684748150758, "grad_norm": 0.9622345600576504, "learning_rate": 9.498213149842753e-06, "loss": 0.9146, "step": 3765 }, { "epoch": 0.5306093694963016, "grad_norm": 0.9903956645221601, "learning_rate": 9.493655336111622e-06, "loss": 0.8894, "step": 3766 }, { "epoch": 0.5307502641775274, "grad_norm": 1.3145751914912562, "learning_rate": 9.489097627834778e-06, "loss": 0.4926, "step": 3767 }, { "epoch": 0.530891158858753, "grad_norm": 1.2390565801542093, "learning_rate": 9.484540025961441e-06, "loss": 0.909, "step": 3768 }, { "epoch": 0.5310320535399788, "grad_norm": 1.2194389775571306, "learning_rate": 9.479982531440799e-06, "loss": 0.4542, "step": 3769 }, { "epoch": 0.5311729482212046, "grad_norm": 1.2668508237181866, "learning_rate": 9.47542514522203e-06, "loss": 0.4938, "step": 3770 }, { "epoch": 0.5313138429024304, "grad_norm": 1.3072993179607337, "learning_rate": 9.47086786825427e-06, "loss": 0.5487, "step": 3771 }, { "epoch": 0.5314547375836562, "grad_norm": 0.9582764699389179, "learning_rate": 9.466310701486654e-06, "loss": 0.901, "step": 3772 }, { "epoch": 0.531595632264882, "grad_norm": 1.2299909163957354, "learning_rate": 9.461753645868282e-06, "loss": 0.4829, "step": 3773 }, { "epoch": 0.5317365269461077, "grad_norm": 1.00471985711184, "learning_rate": 9.457196702348233e-06, "loss": 0.8015, "step": 3774 }, { "epoch": 0.5318774216273335, "grad_norm": 1.3134951606691614, "learning_rate": 9.452639871875558e-06, "loss": 0.5452, "step": 3775 }, { "epoch": 0.5320183163085593, "grad_norm": 1.0638213721218417, "learning_rate": 9.448083155399296e-06, "loss": 0.8009, "step": 3776 }, { "epoch": 0.5321592109897851, "grad_norm": 0.9694138699196456, "learning_rate": 9.443526553868449e-06, "loss": 0.8138, "step": 3777 }, { "epoch": 0.5323001056710109, "grad_norm": 1.0445368680688287, "learning_rate": 9.438970068232006e-06, "loss": 0.897, "step": 3778 }, { "epoch": 0.5324410003522367, "grad_norm": 1.061314394140805, "learning_rate": 9.434413699438923e-06, "loss": 0.8586, "step": 3779 }, { "epoch": 0.5325818950334625, "grad_norm": 1.0212576457725513, "learning_rate": 9.429857448438143e-06, "loss": 0.8989, "step": 3780 }, { "epoch": 0.5327227897146882, "grad_norm": 1.269073292103584, "learning_rate": 9.425301316178572e-06, "loss": 0.5245, "step": 3781 }, { "epoch": 0.532863684395914, "grad_norm": 0.9437973000773041, "learning_rate": 9.420745303609093e-06, "loss": 0.8562, "step": 3782 }, { "epoch": 0.5330045790771398, "grad_norm": 1.0066115979508616, "learning_rate": 9.416189411678576e-06, "loss": 0.8973, "step": 3783 }, { "epoch": 0.5331454737583656, "grad_norm": 1.2637548176659394, "learning_rate": 9.411633641335851e-06, "loss": 0.9289, "step": 3784 }, { "epoch": 0.5332863684395914, "grad_norm": 1.2335018966256364, "learning_rate": 9.407077993529735e-06, "loss": 0.5905, "step": 3785 }, { "epoch": 0.5334272631208172, "grad_norm": 1.0624127870952815, "learning_rate": 9.402522469209009e-06, "loss": 0.8647, "step": 3786 }, { "epoch": 0.5335681578020429, "grad_norm": 0.9706365401393613, "learning_rate": 9.397967069322437e-06, "loss": 0.9037, "step": 3787 }, { "epoch": 0.5337090524832687, "grad_norm": 1.3100542874549335, "learning_rate": 9.393411794818749e-06, "loss": 0.9028, "step": 3788 }, { "epoch": 0.5338499471644945, "grad_norm": 1.096178518144645, "learning_rate": 9.388856646646657e-06, "loss": 0.8517, "step": 3789 }, { "epoch": 0.5339908418457203, "grad_norm": 0.9922542643371127, "learning_rate": 9.384301625754836e-06, "loss": 0.8788, "step": 3790 }, { "epoch": 0.5341317365269461, "grad_norm": 1.0001474105949082, "learning_rate": 9.379746733091952e-06, "loss": 0.9106, "step": 3791 }, { "epoch": 0.5342726312081719, "grad_norm": 0.9566753856124134, "learning_rate": 9.375191969606623e-06, "loss": 0.7945, "step": 3792 }, { "epoch": 0.5344135258893977, "grad_norm": 0.9639993431235471, "learning_rate": 9.370637336247456e-06, "loss": 0.8626, "step": 3793 }, { "epoch": 0.5345544205706234, "grad_norm": 1.342483276332741, "learning_rate": 9.366082833963025e-06, "loss": 0.4674, "step": 3794 }, { "epoch": 0.5346953152518492, "grad_norm": 1.5634484991966653, "learning_rate": 9.361528463701877e-06, "loss": 0.6239, "step": 3795 }, { "epoch": 0.534836209933075, "grad_norm": 1.1872417388141072, "learning_rate": 9.35697422641253e-06, "loss": 0.8307, "step": 3796 }, { "epoch": 0.5349771046143008, "grad_norm": 1.3374268364303743, "learning_rate": 9.352420123043479e-06, "loss": 0.581, "step": 3797 }, { "epoch": 0.5351179992955266, "grad_norm": 1.0411391254576778, "learning_rate": 9.347866154543187e-06, "loss": 0.9233, "step": 3798 }, { "epoch": 0.5352588939767524, "grad_norm": 0.9659922273999626, "learning_rate": 9.343312321860088e-06, "loss": 0.8635, "step": 3799 }, { "epoch": 0.5353997886579782, "grad_norm": 1.0549087622710156, "learning_rate": 9.338758625942587e-06, "loss": 0.8236, "step": 3800 }, { "epoch": 0.5355406833392039, "grad_norm": 0.9886398961658684, "learning_rate": 9.334205067739076e-06, "loss": 0.9133, "step": 3801 }, { "epoch": 0.5356815780204297, "grad_norm": 0.9474013514947356, "learning_rate": 9.32965164819789e-06, "loss": 0.8678, "step": 3802 }, { "epoch": 0.5358224727016555, "grad_norm": 0.9207268659583406, "learning_rate": 9.325098368267362e-06, "loss": 0.8324, "step": 3803 }, { "epoch": 0.5359633673828813, "grad_norm": 0.9137316904370224, "learning_rate": 9.320545228895776e-06, "loss": 0.8252, "step": 3804 }, { "epoch": 0.5361042620641071, "grad_norm": 0.9874965883501845, "learning_rate": 9.315992231031403e-06, "loss": 0.846, "step": 3805 }, { "epoch": 0.5362451567453329, "grad_norm": 1.4658307618035555, "learning_rate": 9.311439375622472e-06, "loss": 0.5276, "step": 3806 }, { "epoch": 0.5363860514265586, "grad_norm": 1.2801591859545831, "learning_rate": 9.30688666361719e-06, "loss": 0.5261, "step": 3807 }, { "epoch": 0.5365269461077844, "grad_norm": 1.1590209048674474, "learning_rate": 9.302334095963729e-06, "loss": 0.4433, "step": 3808 }, { "epoch": 0.5366678407890102, "grad_norm": 1.2970959934514135, "learning_rate": 9.297781673610235e-06, "loss": 0.6529, "step": 3809 }, { "epoch": 0.536808735470236, "grad_norm": 1.277614368231562, "learning_rate": 9.293229397504819e-06, "loss": 0.6218, "step": 3810 }, { "epoch": 0.5369496301514618, "grad_norm": 1.0076942334110697, "learning_rate": 9.288677268595569e-06, "loss": 0.8508, "step": 3811 }, { "epoch": 0.5370905248326876, "grad_norm": 0.9292227869014531, "learning_rate": 9.284125287830535e-06, "loss": 0.803, "step": 3812 }, { "epoch": 0.5372314195139134, "grad_norm": 1.2393695015952126, "learning_rate": 9.279573456157742e-06, "loss": 0.5928, "step": 3813 }, { "epoch": 0.5373723141951391, "grad_norm": 0.9973705009701296, "learning_rate": 9.275021774525175e-06, "loss": 0.9127, "step": 3814 }, { "epoch": 0.5375132088763649, "grad_norm": 1.2239536020064627, "learning_rate": 9.270470243880801e-06, "loss": 0.4553, "step": 3815 }, { "epoch": 0.5376541035575907, "grad_norm": 1.3896171978278316, "learning_rate": 9.26591886517254e-06, "loss": 0.6278, "step": 3816 }, { "epoch": 0.5377949982388165, "grad_norm": 0.9657371609157045, "learning_rate": 9.261367639348297e-06, "loss": 0.8423, "step": 3817 }, { "epoch": 0.5379358929200423, "grad_norm": 1.2238772308267007, "learning_rate": 9.25681656735593e-06, "loss": 0.5882, "step": 3818 }, { "epoch": 0.5380767876012681, "grad_norm": 1.0420080056312817, "learning_rate": 9.252265650143282e-06, "loss": 0.8868, "step": 3819 }, { "epoch": 0.5382176822824938, "grad_norm": 1.2902477127198462, "learning_rate": 9.247714888658138e-06, "loss": 0.5374, "step": 3820 }, { "epoch": 0.5383585769637196, "grad_norm": 1.2240004868412562, "learning_rate": 9.24316428384828e-06, "loss": 0.577, "step": 3821 }, { "epoch": 0.5384994716449454, "grad_norm": 1.0624687264194626, "learning_rate": 9.238613836661436e-06, "loss": 0.8615, "step": 3822 }, { "epoch": 0.5386403663261712, "grad_norm": 0.9852348372629317, "learning_rate": 9.234063548045312e-06, "loss": 0.8302, "step": 3823 }, { "epoch": 0.538781261007397, "grad_norm": 0.9299544367498235, "learning_rate": 9.229513418947577e-06, "loss": 0.8974, "step": 3824 }, { "epoch": 0.5389221556886228, "grad_norm": 0.9488905914665792, "learning_rate": 9.224963450315865e-06, "loss": 0.9239, "step": 3825 }, { "epoch": 0.5390630503698486, "grad_norm": 1.3456001989509665, "learning_rate": 9.220413643097783e-06, "loss": 0.7336, "step": 3826 }, { "epoch": 0.5392039450510743, "grad_norm": 0.9913037861320242, "learning_rate": 9.215863998240895e-06, "loss": 0.9168, "step": 3827 }, { "epoch": 0.5393448397323001, "grad_norm": 1.0418140688252604, "learning_rate": 9.211314516692742e-06, "loss": 0.918, "step": 3828 }, { "epoch": 0.5394857344135259, "grad_norm": 0.8886102633880189, "learning_rate": 9.206765199400821e-06, "loss": 0.851, "step": 3829 }, { "epoch": 0.5396266290947517, "grad_norm": 1.0193027686473606, "learning_rate": 9.202216047312604e-06, "loss": 0.9064, "step": 3830 }, { "epoch": 0.5397675237759775, "grad_norm": 1.462040319120048, "learning_rate": 9.197667061375518e-06, "loss": 0.6228, "step": 3831 }, { "epoch": 0.5399084184572033, "grad_norm": 1.0234110837547734, "learning_rate": 9.193118242536969e-06, "loss": 0.8675, "step": 3832 }, { "epoch": 0.5400493131384291, "grad_norm": 1.1467068004277556, "learning_rate": 9.18856959174431e-06, "loss": 0.8437, "step": 3833 }, { "epoch": 0.5401902078196548, "grad_norm": 0.9784031508419028, "learning_rate": 9.184021109944881e-06, "loss": 0.8178, "step": 3834 }, { "epoch": 0.5403311025008806, "grad_norm": 1.3135158408011862, "learning_rate": 9.179472798085969e-06, "loss": 0.5702, "step": 3835 }, { "epoch": 0.5404719971821064, "grad_norm": 1.2537474195221887, "learning_rate": 9.174924657114835e-06, "loss": 0.8164, "step": 3836 }, { "epoch": 0.5406128918633322, "grad_norm": 1.3056482813392105, "learning_rate": 9.170376687978695e-06, "loss": 0.5677, "step": 3837 }, { "epoch": 0.540753786544558, "grad_norm": 0.9722623065980527, "learning_rate": 9.165828891624744e-06, "loss": 0.8476, "step": 3838 }, { "epoch": 0.5408946812257838, "grad_norm": 1.037634402487089, "learning_rate": 9.161281269000125e-06, "loss": 0.8553, "step": 3839 }, { "epoch": 0.5410355759070095, "grad_norm": 1.201426499994183, "learning_rate": 9.15673382105196e-06, "loss": 0.4878, "step": 3840 }, { "epoch": 0.5411764705882353, "grad_norm": 0.9775915749797857, "learning_rate": 9.152186548727317e-06, "loss": 0.8767, "step": 3841 }, { "epoch": 0.5413173652694611, "grad_norm": 1.2273763534988025, "learning_rate": 9.147639452973247e-06, "loss": 0.6824, "step": 3842 }, { "epoch": 0.5414582599506869, "grad_norm": 0.9969310962778185, "learning_rate": 9.14309253473675e-06, "loss": 0.7774, "step": 3843 }, { "epoch": 0.5415991546319127, "grad_norm": 1.0365270370538127, "learning_rate": 9.138545794964797e-06, "loss": 0.8771, "step": 3844 }, { "epoch": 0.5417400493131385, "grad_norm": 1.11485265364817, "learning_rate": 9.133999234604313e-06, "loss": 0.4539, "step": 3845 }, { "epoch": 0.5418809439943643, "grad_norm": 1.2843913241554352, "learning_rate": 9.129452854602196e-06, "loss": 0.5814, "step": 3846 }, { "epoch": 0.54202183867559, "grad_norm": 1.03086280260626, "learning_rate": 9.1249066559053e-06, "loss": 0.8862, "step": 3847 }, { "epoch": 0.5421627333568158, "grad_norm": 0.9425060640271024, "learning_rate": 9.120360639460445e-06, "loss": 0.8576, "step": 3848 }, { "epoch": 0.5423036280380416, "grad_norm": 1.186603227219929, "learning_rate": 9.1158148062144e-06, "loss": 0.8467, "step": 3849 }, { "epoch": 0.5424445227192674, "grad_norm": 1.0423287424871406, "learning_rate": 9.111269157113926e-06, "loss": 0.9035, "step": 3850 }, { "epoch": 0.5425854174004932, "grad_norm": 1.0085694077866234, "learning_rate": 9.106723693105709e-06, "loss": 0.8368, "step": 3851 }, { "epoch": 0.542726312081719, "grad_norm": 1.3317007310884021, "learning_rate": 9.102178415136423e-06, "loss": 0.4784, "step": 3852 }, { "epoch": 0.5428672067629448, "grad_norm": 0.9503392212903747, "learning_rate": 9.09763332415269e-06, "loss": 0.8633, "step": 3853 }, { "epoch": 0.5430081014441704, "grad_norm": 0.9998786301494758, "learning_rate": 9.0930884211011e-06, "loss": 0.878, "step": 3854 }, { "epoch": 0.5431489961253962, "grad_norm": 0.8822703054891485, "learning_rate": 9.0885437069282e-06, "loss": 0.8616, "step": 3855 }, { "epoch": 0.543289890806622, "grad_norm": 1.0335616001318357, "learning_rate": 9.083999182580498e-06, "loss": 0.8482, "step": 3856 }, { "epoch": 0.5434307854878478, "grad_norm": 1.233462563804016, "learning_rate": 9.079454849004463e-06, "loss": 0.9159, "step": 3857 }, { "epoch": 0.5435716801690736, "grad_norm": 1.0397420852002524, "learning_rate": 9.074910707146528e-06, "loss": 0.8759, "step": 3858 }, { "epoch": 0.5437125748502994, "grad_norm": 1.0633929351503584, "learning_rate": 9.070366757953071e-06, "loss": 0.8074, "step": 3859 }, { "epoch": 0.5438534695315251, "grad_norm": 1.0075651683279698, "learning_rate": 9.065823002370459e-06, "loss": 0.8726, "step": 3860 }, { "epoch": 0.5439943642127509, "grad_norm": 1.0791977817136287, "learning_rate": 9.061279441344986e-06, "loss": 0.8971, "step": 3861 }, { "epoch": 0.5441352588939767, "grad_norm": 1.1911479832734488, "learning_rate": 9.056736075822928e-06, "loss": 0.4222, "step": 3862 }, { "epoch": 0.5442761535752025, "grad_norm": 0.9779766870994598, "learning_rate": 9.05219290675051e-06, "loss": 0.871, "step": 3863 }, { "epoch": 0.5444170482564283, "grad_norm": 0.9194680177562888, "learning_rate": 9.047649935073923e-06, "loss": 0.8184, "step": 3864 }, { "epoch": 0.5445579429376541, "grad_norm": 0.8861407904863419, "learning_rate": 9.043107161739309e-06, "loss": 0.8543, "step": 3865 }, { "epoch": 0.5446988376188799, "grad_norm": 0.9586546627529019, "learning_rate": 9.03856458769277e-06, "loss": 0.903, "step": 3866 }, { "epoch": 0.5448397323001056, "grad_norm": 0.9878222435080464, "learning_rate": 9.034022213880375e-06, "loss": 0.872, "step": 3867 }, { "epoch": 0.5449806269813314, "grad_norm": 1.046136965488693, "learning_rate": 9.029480041248139e-06, "loss": 0.792, "step": 3868 }, { "epoch": 0.5451215216625572, "grad_norm": 1.0847634533880306, "learning_rate": 9.024938070742048e-06, "loss": 0.9009, "step": 3869 }, { "epoch": 0.545262416343783, "grad_norm": 0.9542917121746195, "learning_rate": 9.02039630330803e-06, "loss": 0.8015, "step": 3870 }, { "epoch": 0.5454033110250088, "grad_norm": 0.9296172682159001, "learning_rate": 9.015854739891992e-06, "loss": 0.8631, "step": 3871 }, { "epoch": 0.5455442057062346, "grad_norm": 1.2411202289617307, "learning_rate": 9.011313381439775e-06, "loss": 0.4356, "step": 3872 }, { "epoch": 0.5456851003874603, "grad_norm": 1.0346433581426737, "learning_rate": 9.006772228897198e-06, "loss": 0.8471, "step": 3873 }, { "epoch": 0.5458259950686861, "grad_norm": 0.9572413962354888, "learning_rate": 9.002231283210021e-06, "loss": 0.8597, "step": 3874 }, { "epoch": 0.5459668897499119, "grad_norm": 1.412609917140115, "learning_rate": 8.997690545323973e-06, "loss": 0.6163, "step": 3875 }, { "epoch": 0.5461077844311377, "grad_norm": 1.1713775634267711, "learning_rate": 8.993150016184732e-06, "loss": 0.4918, "step": 3876 }, { "epoch": 0.5462486791123635, "grad_norm": 1.059104943286043, "learning_rate": 8.988609696737935e-06, "loss": 0.9481, "step": 3877 }, { "epoch": 0.5463895737935893, "grad_norm": 0.9711230002600111, "learning_rate": 8.984069587929173e-06, "loss": 0.8876, "step": 3878 }, { "epoch": 0.5465304684748151, "grad_norm": 1.3278288105569487, "learning_rate": 8.979529690704003e-06, "loss": 0.5043, "step": 3879 }, { "epoch": 0.5466713631560408, "grad_norm": 0.8868299567015164, "learning_rate": 8.97499000600792e-06, "loss": 0.846, "step": 3880 }, { "epoch": 0.5468122578372666, "grad_norm": 0.9467649123783194, "learning_rate": 8.970450534786398e-06, "loss": 0.8592, "step": 3881 }, { "epoch": 0.5469531525184924, "grad_norm": 1.2648848028024655, "learning_rate": 8.965911277984838e-06, "loss": 0.4593, "step": 3882 }, { "epoch": 0.5470940471997182, "grad_norm": 0.8596126170395043, "learning_rate": 8.961372236548625e-06, "loss": 0.8086, "step": 3883 }, { "epoch": 0.547234941880944, "grad_norm": 0.9569027344255223, "learning_rate": 8.95683341142308e-06, "loss": 0.9052, "step": 3884 }, { "epoch": 0.5473758365621698, "grad_norm": 1.277057889811126, "learning_rate": 8.95229480355349e-06, "loss": 0.5014, "step": 3885 }, { "epoch": 0.5475167312433956, "grad_norm": 0.9546316339032895, "learning_rate": 8.947756413885086e-06, "loss": 0.8841, "step": 3886 }, { "epoch": 0.5476576259246213, "grad_norm": 0.9528915104204273, "learning_rate": 8.943218243363066e-06, "loss": 0.8467, "step": 3887 }, { "epoch": 0.5477985206058471, "grad_norm": 1.022769187033375, "learning_rate": 8.938680292932569e-06, "loss": 0.8969, "step": 3888 }, { "epoch": 0.5479394152870729, "grad_norm": 0.9483065340268877, "learning_rate": 8.934142563538699e-06, "loss": 0.9045, "step": 3889 }, { "epoch": 0.5480803099682987, "grad_norm": 1.1373966547570677, "learning_rate": 8.929605056126506e-06, "loss": 0.5059, "step": 3890 }, { "epoch": 0.5482212046495245, "grad_norm": 0.9152918922582957, "learning_rate": 8.925067771641006e-06, "loss": 0.8617, "step": 3891 }, { "epoch": 0.5483620993307503, "grad_norm": 1.364053239538126, "learning_rate": 8.92053071102715e-06, "loss": 0.5243, "step": 3892 }, { "epoch": 0.548502994011976, "grad_norm": 0.9719266252411716, "learning_rate": 8.915993875229862e-06, "loss": 0.8466, "step": 3893 }, { "epoch": 0.5486438886932018, "grad_norm": 1.002363078345581, "learning_rate": 8.911457265194003e-06, "loss": 0.8781, "step": 3894 }, { "epoch": 0.5487847833744276, "grad_norm": 1.29676942133024, "learning_rate": 8.9069208818644e-06, "loss": 0.5711, "step": 3895 }, { "epoch": 0.5489256780556534, "grad_norm": 1.274426883446475, "learning_rate": 8.90238472618582e-06, "loss": 0.493, "step": 3896 }, { "epoch": 0.5490665727368792, "grad_norm": 1.0014871015161575, "learning_rate": 8.897848799102994e-06, "loss": 0.8568, "step": 3897 }, { "epoch": 0.549207467418105, "grad_norm": 1.0808018715465235, "learning_rate": 8.893313101560596e-06, "loss": 0.919, "step": 3898 }, { "epoch": 0.5493483620993308, "grad_norm": 0.9144348211177041, "learning_rate": 8.888777634503266e-06, "loss": 0.8428, "step": 3899 }, { "epoch": 0.5494892567805565, "grad_norm": 1.003258391631368, "learning_rate": 8.884242398875574e-06, "loss": 0.8694, "step": 3900 }, { "epoch": 0.5496301514617823, "grad_norm": 1.3844724399267012, "learning_rate": 8.879707395622066e-06, "loss": 0.4904, "step": 3901 }, { "epoch": 0.5497710461430081, "grad_norm": 1.051577301079878, "learning_rate": 8.87517262568722e-06, "loss": 0.8363, "step": 3902 }, { "epoch": 0.5499119408242339, "grad_norm": 0.9508564984704002, "learning_rate": 8.87063809001548e-06, "loss": 0.905, "step": 3903 }, { "epoch": 0.5500528355054597, "grad_norm": 0.8989427634475994, "learning_rate": 8.86610378955123e-06, "loss": 0.8481, "step": 3904 }, { "epoch": 0.5501937301866855, "grad_norm": 1.0407435714769588, "learning_rate": 8.861569725238812e-06, "loss": 0.8249, "step": 3905 }, { "epoch": 0.5503346248679112, "grad_norm": 1.0194255418015896, "learning_rate": 8.857035898022515e-06, "loss": 0.9291, "step": 3906 }, { "epoch": 0.550475519549137, "grad_norm": 1.085557068419563, "learning_rate": 8.852502308846584e-06, "loss": 0.8279, "step": 3907 }, { "epoch": 0.5506164142303628, "grad_norm": 0.9922801857780011, "learning_rate": 8.847968958655208e-06, "loss": 0.8625, "step": 3908 }, { "epoch": 0.5507573089115886, "grad_norm": 1.075385062680233, "learning_rate": 8.843435848392528e-06, "loss": 0.8849, "step": 3909 }, { "epoch": 0.5508982035928144, "grad_norm": 0.9036340923355604, "learning_rate": 8.838902979002639e-06, "loss": 0.7945, "step": 3910 }, { "epoch": 0.5510390982740402, "grad_norm": 1.4420621085132843, "learning_rate": 8.834370351429575e-06, "loss": 0.4868, "step": 3911 }, { "epoch": 0.551179992955266, "grad_norm": 1.3337964136261362, "learning_rate": 8.829837966617341e-06, "loss": 0.4283, "step": 3912 }, { "epoch": 0.5513208876364917, "grad_norm": 1.0823836487779337, "learning_rate": 8.825305825509864e-06, "loss": 0.9284, "step": 3913 }, { "epoch": 0.5514617823177175, "grad_norm": 0.9458117767337131, "learning_rate": 8.820773929051046e-06, "loss": 0.8508, "step": 3914 }, { "epoch": 0.5516026769989433, "grad_norm": 0.9839527743270421, "learning_rate": 8.816242278184716e-06, "loss": 0.9091, "step": 3915 }, { "epoch": 0.5517435716801691, "grad_norm": 1.114756118877342, "learning_rate": 8.81171087385467e-06, "loss": 0.8745, "step": 3916 }, { "epoch": 0.5518844663613949, "grad_norm": 0.9758218322769349, "learning_rate": 8.807179717004639e-06, "loss": 0.8416, "step": 3917 }, { "epoch": 0.5520253610426207, "grad_norm": 1.242890040755083, "learning_rate": 8.802648808578311e-06, "loss": 0.5419, "step": 3918 }, { "epoch": 0.5521662557238465, "grad_norm": 0.9840569500536532, "learning_rate": 8.798118149519319e-06, "loss": 0.868, "step": 3919 }, { "epoch": 0.5523071504050722, "grad_norm": 0.9663986149052439, "learning_rate": 8.793587740771244e-06, "loss": 0.8289, "step": 3920 }, { "epoch": 0.552448045086298, "grad_norm": 0.9798046779325931, "learning_rate": 8.789057583277612e-06, "loss": 0.7988, "step": 3921 }, { "epoch": 0.5525889397675238, "grad_norm": 0.9953715245869584, "learning_rate": 8.784527677981908e-06, "loss": 0.881, "step": 3922 }, { "epoch": 0.5527298344487496, "grad_norm": 1.0024441030723292, "learning_rate": 8.77999802582755e-06, "loss": 0.8734, "step": 3923 }, { "epoch": 0.5528707291299754, "grad_norm": 1.0028438639318384, "learning_rate": 8.775468627757913e-06, "loss": 0.8445, "step": 3924 }, { "epoch": 0.5530116238112012, "grad_norm": 0.9991306924723985, "learning_rate": 8.770939484716313e-06, "loss": 0.8788, "step": 3925 }, { "epoch": 0.5531525184924269, "grad_norm": 1.0448559239614197, "learning_rate": 8.76641059764602e-06, "loss": 0.8622, "step": 3926 }, { "epoch": 0.5532934131736527, "grad_norm": 1.109825099001729, "learning_rate": 8.76188196749024e-06, "loss": 0.8457, "step": 3927 }, { "epoch": 0.5534343078548785, "grad_norm": 1.2466690748190092, "learning_rate": 8.757353595192137e-06, "loss": 0.4814, "step": 3928 }, { "epoch": 0.5535752025361043, "grad_norm": 1.5319035842848157, "learning_rate": 8.752825481694813e-06, "loss": 0.4966, "step": 3929 }, { "epoch": 0.5537160972173301, "grad_norm": 1.038593506378994, "learning_rate": 8.748297627941327e-06, "loss": 0.9075, "step": 3930 }, { "epoch": 0.5538569918985559, "grad_norm": 1.0108757032188682, "learning_rate": 8.743770034874663e-06, "loss": 0.8904, "step": 3931 }, { "epoch": 0.5539978865797817, "grad_norm": 1.3335070770182864, "learning_rate": 8.739242703437775e-06, "loss": 0.5965, "step": 3932 }, { "epoch": 0.5541387812610074, "grad_norm": 1.6413146287434521, "learning_rate": 8.734715634573547e-06, "loss": 0.5248, "step": 3933 }, { "epoch": 0.5542796759422332, "grad_norm": 1.3279397943265983, "learning_rate": 8.730188829224815e-06, "loss": 0.4867, "step": 3934 }, { "epoch": 0.554420570623459, "grad_norm": 1.01216698550208, "learning_rate": 8.725662288334353e-06, "loss": 0.9734, "step": 3935 }, { "epoch": 0.5545614653046848, "grad_norm": 1.2146091939936026, "learning_rate": 8.721136012844893e-06, "loss": 0.9348, "step": 3936 }, { "epoch": 0.5547023599859106, "grad_norm": 0.949094670305316, "learning_rate": 8.716610003699092e-06, "loss": 0.8769, "step": 3937 }, { "epoch": 0.5548432546671364, "grad_norm": 1.2829746886969444, "learning_rate": 8.712084261839575e-06, "loss": 0.8435, "step": 3938 }, { "epoch": 0.5549841493483622, "grad_norm": 0.9713005082673618, "learning_rate": 8.707558788208889e-06, "loss": 0.8477, "step": 3939 }, { "epoch": 0.5551250440295878, "grad_norm": 1.3601176081173212, "learning_rate": 8.703033583749545e-06, "loss": 0.5758, "step": 3940 }, { "epoch": 0.5552659387108136, "grad_norm": 1.2010123106047497, "learning_rate": 8.698508649403979e-06, "loss": 0.3604, "step": 3941 }, { "epoch": 0.5554068333920394, "grad_norm": 1.3254940920827865, "learning_rate": 8.69398398611459e-06, "loss": 0.5192, "step": 3942 }, { "epoch": 0.5555477280732652, "grad_norm": 1.4662692659726053, "learning_rate": 8.689459594823703e-06, "loss": 0.6301, "step": 3943 }, { "epoch": 0.555688622754491, "grad_norm": 1.103137945432259, "learning_rate": 8.6849354764736e-06, "loss": 0.4362, "step": 3944 }, { "epoch": 0.5558295174357168, "grad_norm": 1.019985176153025, "learning_rate": 8.680411632006492e-06, "loss": 0.8579, "step": 3945 }, { "epoch": 0.5559704121169425, "grad_norm": 1.0238116061935993, "learning_rate": 8.67588806236455e-06, "loss": 0.8345, "step": 3946 }, { "epoch": 0.5561113067981683, "grad_norm": 1.084749075994692, "learning_rate": 8.671364768489875e-06, "loss": 0.8771, "step": 3947 }, { "epoch": 0.5562522014793941, "grad_norm": 1.2036252706865058, "learning_rate": 8.666841751324515e-06, "loss": 0.5826, "step": 3948 }, { "epoch": 0.5563930961606199, "grad_norm": 1.6751914573601951, "learning_rate": 8.66231901181046e-06, "loss": 0.5008, "step": 3949 }, { "epoch": 0.5565339908418457, "grad_norm": 1.0924165673487214, "learning_rate": 8.65779655088964e-06, "loss": 0.9203, "step": 3950 }, { "epoch": 0.5566748855230715, "grad_norm": 1.022915640934107, "learning_rate": 8.653274369503933e-06, "loss": 0.8668, "step": 3951 }, { "epoch": 0.5568157802042973, "grad_norm": 0.9486402072640769, "learning_rate": 8.64875246859515e-06, "loss": 0.8073, "step": 3952 }, { "epoch": 0.556956674885523, "grad_norm": 0.960008830765521, "learning_rate": 8.644230849105054e-06, "loss": 0.8817, "step": 3953 }, { "epoch": 0.5570975695667488, "grad_norm": 0.9284189380267825, "learning_rate": 8.639709511975339e-06, "loss": 0.8435, "step": 3954 }, { "epoch": 0.5572384642479746, "grad_norm": 1.1008426563562326, "learning_rate": 8.635188458147648e-06, "loss": 0.4492, "step": 3955 }, { "epoch": 0.5573793589292004, "grad_norm": 1.1032450511511127, "learning_rate": 8.63066768856356e-06, "loss": 0.8045, "step": 3956 }, { "epoch": 0.5575202536104262, "grad_norm": 0.9690201127086564, "learning_rate": 8.626147204164599e-06, "loss": 0.8876, "step": 3957 }, { "epoch": 0.557661148291652, "grad_norm": 1.088746187950155, "learning_rate": 8.621627005892227e-06, "loss": 0.9073, "step": 3958 }, { "epoch": 0.5578020429728777, "grad_norm": 1.0022446675054473, "learning_rate": 8.617107094687845e-06, "loss": 0.8679, "step": 3959 }, { "epoch": 0.5579429376541035, "grad_norm": 0.9989669333431506, "learning_rate": 8.612587471492793e-06, "loss": 0.8017, "step": 3960 }, { "epoch": 0.5580838323353293, "grad_norm": 0.92219084891457, "learning_rate": 8.608068137248366e-06, "loss": 0.8303, "step": 3961 }, { "epoch": 0.5582247270165551, "grad_norm": 0.965678123665396, "learning_rate": 8.603549092895773e-06, "loss": 0.8322, "step": 3962 }, { "epoch": 0.5583656216977809, "grad_norm": 0.913670194536137, "learning_rate": 8.599030339376187e-06, "loss": 0.822, "step": 3963 }, { "epoch": 0.5585065163790067, "grad_norm": 1.2427684467591615, "learning_rate": 8.594511877630702e-06, "loss": 0.4614, "step": 3964 }, { "epoch": 0.5586474110602325, "grad_norm": 1.3617077846378698, "learning_rate": 8.589993708600365e-06, "loss": 0.6542, "step": 3965 }, { "epoch": 0.5587883057414582, "grad_norm": 1.0553217028728943, "learning_rate": 8.585475833226152e-06, "loss": 0.8883, "step": 3966 }, { "epoch": 0.558929200422684, "grad_norm": 1.0277343937828671, "learning_rate": 8.580958252448986e-06, "loss": 0.811, "step": 3967 }, { "epoch": 0.5590700951039098, "grad_norm": 1.3433458962383775, "learning_rate": 8.576440967209722e-06, "loss": 0.5999, "step": 3968 }, { "epoch": 0.5592109897851356, "grad_norm": 1.4919724193862687, "learning_rate": 8.571923978449157e-06, "loss": 0.5163, "step": 3969 }, { "epoch": 0.5593518844663614, "grad_norm": 0.9575891118206021, "learning_rate": 8.567407287108022e-06, "loss": 0.8324, "step": 3970 }, { "epoch": 0.5594927791475872, "grad_norm": 1.0402219648796798, "learning_rate": 8.562890894126998e-06, "loss": 0.8994, "step": 3971 }, { "epoch": 0.559633673828813, "grad_norm": 1.0065077847250417, "learning_rate": 8.558374800446686e-06, "loss": 0.8812, "step": 3972 }, { "epoch": 0.5597745685100387, "grad_norm": 1.1113770808422856, "learning_rate": 8.553859007007638e-06, "loss": 0.9425, "step": 3973 }, { "epoch": 0.5599154631912645, "grad_norm": 1.0235335862435555, "learning_rate": 8.54934351475034e-06, "loss": 0.9227, "step": 3974 }, { "epoch": 0.5600563578724903, "grad_norm": 1.383117476251119, "learning_rate": 8.544828324615216e-06, "loss": 0.516, "step": 3975 }, { "epoch": 0.5601972525537161, "grad_norm": 1.3080367877111023, "learning_rate": 8.540313437542622e-06, "loss": 0.4958, "step": 3976 }, { "epoch": 0.5603381472349419, "grad_norm": 0.8742984979991459, "learning_rate": 8.53579885447286e-06, "loss": 0.7958, "step": 3977 }, { "epoch": 0.5604790419161677, "grad_norm": 1.2907544690813966, "learning_rate": 8.531284576346154e-06, "loss": 0.5179, "step": 3978 }, { "epoch": 0.5606199365973934, "grad_norm": 1.2849912130135646, "learning_rate": 8.526770604102686e-06, "loss": 0.575, "step": 3979 }, { "epoch": 0.5607608312786192, "grad_norm": 1.0012825819693403, "learning_rate": 8.52225693868255e-06, "loss": 0.854, "step": 3980 }, { "epoch": 0.560901725959845, "grad_norm": 1.0381736829306043, "learning_rate": 8.517743581025798e-06, "loss": 0.869, "step": 3981 }, { "epoch": 0.5610426206410708, "grad_norm": 1.2853300958597371, "learning_rate": 8.513230532072405e-06, "loss": 0.5169, "step": 3982 }, { "epoch": 0.5611835153222966, "grad_norm": 0.9878809721196675, "learning_rate": 8.508717792762283e-06, "loss": 0.9021, "step": 3983 }, { "epoch": 0.5613244100035224, "grad_norm": 1.3574175037350946, "learning_rate": 8.504205364035283e-06, "loss": 0.4989, "step": 3984 }, { "epoch": 0.5614653046847482, "grad_norm": 0.9199108239613591, "learning_rate": 8.49969324683119e-06, "loss": 0.8544, "step": 3985 }, { "epoch": 0.5616061993659739, "grad_norm": 1.0440795470233843, "learning_rate": 8.495181442089722e-06, "loss": 0.8989, "step": 3986 }, { "epoch": 0.5617470940471997, "grad_norm": 0.9011069436919538, "learning_rate": 8.490669950750536e-06, "loss": 0.8501, "step": 3987 }, { "epoch": 0.5618879887284255, "grad_norm": 0.9511198342250238, "learning_rate": 8.486158773753217e-06, "loss": 0.8599, "step": 3988 }, { "epoch": 0.5620288834096513, "grad_norm": 1.0418913755596106, "learning_rate": 8.4816479120373e-06, "loss": 0.8943, "step": 3989 }, { "epoch": 0.5621697780908771, "grad_norm": 0.9264392221044895, "learning_rate": 8.477137366542227e-06, "loss": 0.7683, "step": 3990 }, { "epoch": 0.5623106727721029, "grad_norm": 1.022322587384411, "learning_rate": 8.472627138207406e-06, "loss": 0.8517, "step": 3991 }, { "epoch": 0.5624515674533286, "grad_norm": 0.9888682667372777, "learning_rate": 8.468117227972156e-06, "loss": 0.829, "step": 3992 }, { "epoch": 0.5625924621345544, "grad_norm": 1.0047620087966753, "learning_rate": 8.463607636775735e-06, "loss": 0.8294, "step": 3993 }, { "epoch": 0.5627333568157802, "grad_norm": 1.3669909439763577, "learning_rate": 8.45909836555734e-06, "loss": 0.5261, "step": 3994 }, { "epoch": 0.562874251497006, "grad_norm": 1.1001363145553062, "learning_rate": 8.454589415256098e-06, "loss": 0.8905, "step": 3995 }, { "epoch": 0.5630151461782318, "grad_norm": 0.968668752270354, "learning_rate": 8.45008078681107e-06, "loss": 0.8425, "step": 3996 }, { "epoch": 0.5631560408594576, "grad_norm": 1.2739096144081863, "learning_rate": 8.445572481161248e-06, "loss": 0.3951, "step": 3997 }, { "epoch": 0.5632969355406834, "grad_norm": 1.228263466788934, "learning_rate": 8.441064499245559e-06, "loss": 0.5465, "step": 3998 }, { "epoch": 0.5634378302219091, "grad_norm": 1.0856644184869821, "learning_rate": 8.43655684200286e-06, "loss": 0.8732, "step": 3999 }, { "epoch": 0.5635787249031349, "grad_norm": 1.1512356718004084, "learning_rate": 8.432049510371944e-06, "loss": 0.4529, "step": 4000 }, { "epoch": 0.5637196195843607, "grad_norm": 0.9756241150028238, "learning_rate": 8.427542505291526e-06, "loss": 0.8756, "step": 4001 }, { "epoch": 0.5638605142655865, "grad_norm": 1.284999622512685, "learning_rate": 8.423035827700279e-06, "loss": 0.4642, "step": 4002 }, { "epoch": 0.5640014089468123, "grad_norm": 1.0827066356649422, "learning_rate": 8.41852947853677e-06, "loss": 0.8979, "step": 4003 }, { "epoch": 0.5641423036280381, "grad_norm": 1.2004858370813694, "learning_rate": 8.414023458739531e-06, "loss": 0.474, "step": 4004 }, { "epoch": 0.5642831983092639, "grad_norm": 1.110755265728714, "learning_rate": 8.409517769247006e-06, "loss": 0.8668, "step": 4005 }, { "epoch": 0.5644240929904896, "grad_norm": 0.9200046234026443, "learning_rate": 8.40501241099758e-06, "loss": 0.8888, "step": 4006 }, { "epoch": 0.5645649876717154, "grad_norm": 0.9561217075789354, "learning_rate": 8.400507384929558e-06, "loss": 0.8513, "step": 4007 }, { "epoch": 0.5647058823529412, "grad_norm": 1.061392169791022, "learning_rate": 8.396002691981194e-06, "loss": 0.8112, "step": 4008 }, { "epoch": 0.564846777034167, "grad_norm": 1.0016318213877675, "learning_rate": 8.391498333090647e-06, "loss": 0.8779, "step": 4009 }, { "epoch": 0.5649876717153928, "grad_norm": 1.0511142017405168, "learning_rate": 8.386994309196038e-06, "loss": 0.9093, "step": 4010 }, { "epoch": 0.5651285663966186, "grad_norm": 1.2823781190195858, "learning_rate": 8.382490621235386e-06, "loss": 0.5452, "step": 4011 }, { "epoch": 0.5652694610778443, "grad_norm": 0.928020397445027, "learning_rate": 8.377987270146667e-06, "loss": 0.8347, "step": 4012 }, { "epoch": 0.5654103557590701, "grad_norm": 1.23019785840111, "learning_rate": 8.373484256867766e-06, "loss": 0.5319, "step": 4013 }, { "epoch": 0.5655512504402959, "grad_norm": 1.2422895884595262, "learning_rate": 8.368981582336515e-06, "loss": 0.5852, "step": 4014 }, { "epoch": 0.5656921451215217, "grad_norm": 1.0173705106268958, "learning_rate": 8.36447924749066e-06, "loss": 0.8629, "step": 4015 }, { "epoch": 0.5658330398027475, "grad_norm": 1.092193470570678, "learning_rate": 8.359977253267888e-06, "loss": 0.918, "step": 4016 }, { "epoch": 0.5659739344839733, "grad_norm": 1.1609163522970054, "learning_rate": 8.355475600605808e-06, "loss": 0.4517, "step": 4017 }, { "epoch": 0.5661148291651991, "grad_norm": 1.37437037898976, "learning_rate": 8.350974290441964e-06, "loss": 0.7105, "step": 4018 }, { "epoch": 0.5662557238464248, "grad_norm": 0.9682875398691438, "learning_rate": 8.346473323713817e-06, "loss": 0.8588, "step": 4019 }, { "epoch": 0.5663966185276506, "grad_norm": 1.1321292404514143, "learning_rate": 8.341972701358778e-06, "loss": 0.4471, "step": 4020 }, { "epoch": 0.5665375132088764, "grad_norm": 0.9623294419559997, "learning_rate": 8.337472424314156e-06, "loss": 0.8627, "step": 4021 }, { "epoch": 0.5666784078901022, "grad_norm": 1.2951031866763145, "learning_rate": 8.332972493517223e-06, "loss": 0.5928, "step": 4022 }, { "epoch": 0.566819302571328, "grad_norm": 0.9847791399295057, "learning_rate": 8.328472909905145e-06, "loss": 0.8285, "step": 4023 }, { "epoch": 0.5669601972525538, "grad_norm": 0.9471962343425577, "learning_rate": 8.323973674415042e-06, "loss": 0.8348, "step": 4024 }, { "epoch": 0.5671010919337796, "grad_norm": 0.9724396648923068, "learning_rate": 8.319474787983945e-06, "loss": 0.9139, "step": 4025 }, { "epoch": 0.5672419866150052, "grad_norm": 0.9737610455573714, "learning_rate": 8.314976251548823e-06, "loss": 0.8293, "step": 4026 }, { "epoch": 0.567382881296231, "grad_norm": 0.8988230291091879, "learning_rate": 8.310478066046563e-06, "loss": 0.8029, "step": 4027 }, { "epoch": 0.5675237759774568, "grad_norm": 1.3846929715909262, "learning_rate": 8.305980232413987e-06, "loss": 0.5607, "step": 4028 }, { "epoch": 0.5676646706586826, "grad_norm": 1.2901037824485782, "learning_rate": 8.301482751587836e-06, "loss": 0.6027, "step": 4029 }, { "epoch": 0.5678055653399084, "grad_norm": 1.2361950396150474, "learning_rate": 8.296985624504786e-06, "loss": 0.4484, "step": 4030 }, { "epoch": 0.5679464600211342, "grad_norm": 0.9809545450686583, "learning_rate": 8.29248885210143e-06, "loss": 0.8929, "step": 4031 }, { "epoch": 0.5680873547023599, "grad_norm": 1.3572508376326649, "learning_rate": 8.287992435314297e-06, "loss": 0.4718, "step": 4032 }, { "epoch": 0.5682282493835857, "grad_norm": 1.4644280263273584, "learning_rate": 8.283496375079838e-06, "loss": 0.613, "step": 4033 }, { "epoch": 0.5683691440648115, "grad_norm": 0.8894157621640978, "learning_rate": 8.279000672334418e-06, "loss": 0.8467, "step": 4034 }, { "epoch": 0.5685100387460373, "grad_norm": 1.0789559738906191, "learning_rate": 8.274505328014351e-06, "loss": 0.848, "step": 4035 }, { "epoch": 0.5686509334272631, "grad_norm": 1.2805487506118216, "learning_rate": 8.270010343055856e-06, "loss": 0.4495, "step": 4036 }, { "epoch": 0.5687918281084889, "grad_norm": 1.334457538629049, "learning_rate": 8.26551571839509e-06, "loss": 0.4746, "step": 4037 }, { "epoch": 0.5689327227897147, "grad_norm": 1.2807225814415037, "learning_rate": 8.261021454968123e-06, "loss": 0.5841, "step": 4038 }, { "epoch": 0.5690736174709404, "grad_norm": 1.040884523655601, "learning_rate": 8.256527553710964e-06, "loss": 0.8989, "step": 4039 }, { "epoch": 0.5692145121521662, "grad_norm": 1.2334839929092558, "learning_rate": 8.252034015559529e-06, "loss": 0.4615, "step": 4040 }, { "epoch": 0.569355406833392, "grad_norm": 1.0312854703072905, "learning_rate": 8.247540841449683e-06, "loss": 0.8878, "step": 4041 }, { "epoch": 0.5694963015146178, "grad_norm": 0.9221791549774615, "learning_rate": 8.243048032317185e-06, "loss": 0.8063, "step": 4042 }, { "epoch": 0.5696371961958436, "grad_norm": 1.1791537657560844, "learning_rate": 8.238555589097745e-06, "loss": 0.5903, "step": 4043 }, { "epoch": 0.5697780908770694, "grad_norm": 0.9667023524304481, "learning_rate": 8.23406351272698e-06, "loss": 0.8532, "step": 4044 }, { "epoch": 0.5699189855582951, "grad_norm": 1.0383723406796377, "learning_rate": 8.22957180414044e-06, "loss": 0.8025, "step": 4045 }, { "epoch": 0.5700598802395209, "grad_norm": 1.0757874698408159, "learning_rate": 8.225080464273589e-06, "loss": 0.9003, "step": 4046 }, { "epoch": 0.5702007749207467, "grad_norm": 1.0936089204956336, "learning_rate": 8.220589494061823e-06, "loss": 0.4578, "step": 4047 }, { "epoch": 0.5703416696019725, "grad_norm": 0.9488557821884004, "learning_rate": 8.216098894440457e-06, "loss": 0.8168, "step": 4048 }, { "epoch": 0.5704825642831983, "grad_norm": 0.9685528809546867, "learning_rate": 8.21160866634473e-06, "loss": 0.8509, "step": 4049 }, { "epoch": 0.5706234589644241, "grad_norm": 1.064303473915355, "learning_rate": 8.2071188107098e-06, "loss": 0.841, "step": 4050 }, { "epoch": 0.5707643536456499, "grad_norm": 1.0603070940628974, "learning_rate": 8.202629328470759e-06, "loss": 0.9074, "step": 4051 }, { "epoch": 0.5709052483268756, "grad_norm": 0.929309966796212, "learning_rate": 8.1981402205626e-06, "loss": 0.8714, "step": 4052 }, { "epoch": 0.5710461430081014, "grad_norm": 0.9014960768419701, "learning_rate": 8.193651487920261e-06, "loss": 0.8631, "step": 4053 }, { "epoch": 0.5711870376893272, "grad_norm": 0.8893506296869722, "learning_rate": 8.189163131478589e-06, "loss": 0.8585, "step": 4054 }, { "epoch": 0.571327932370553, "grad_norm": 1.0008906297981894, "learning_rate": 8.184675152172354e-06, "loss": 0.9032, "step": 4055 }, { "epoch": 0.5714688270517788, "grad_norm": 1.2439862942714222, "learning_rate": 8.180187550936248e-06, "loss": 0.4268, "step": 4056 }, { "epoch": 0.5716097217330046, "grad_norm": 1.1103956548083544, "learning_rate": 8.175700328704886e-06, "loss": 0.8798, "step": 4057 }, { "epoch": 0.5717506164142304, "grad_norm": 1.0926248271500507, "learning_rate": 8.171213486412803e-06, "loss": 0.8539, "step": 4058 }, { "epoch": 0.5718915110954561, "grad_norm": 0.9590727039133519, "learning_rate": 8.166727024994456e-06, "loss": 0.8595, "step": 4059 }, { "epoch": 0.5720324057766819, "grad_norm": 1.1224183816656281, "learning_rate": 8.162240945384219e-06, "loss": 0.8883, "step": 4060 }, { "epoch": 0.5721733004579077, "grad_norm": 1.009751584361626, "learning_rate": 8.157755248516394e-06, "loss": 0.9251, "step": 4061 }, { "epoch": 0.5723141951391335, "grad_norm": 1.2781856853834213, "learning_rate": 8.15326993532519e-06, "loss": 0.6215, "step": 4062 }, { "epoch": 0.5724550898203593, "grad_norm": 1.0109326585257383, "learning_rate": 8.148785006744755e-06, "loss": 0.839, "step": 4063 }, { "epoch": 0.5725959845015851, "grad_norm": 1.2830874558360577, "learning_rate": 8.144300463709137e-06, "loss": 0.4844, "step": 4064 }, { "epoch": 0.5727368791828108, "grad_norm": 1.2812418904316405, "learning_rate": 8.139816307152319e-06, "loss": 0.482, "step": 4065 }, { "epoch": 0.5728777738640366, "grad_norm": 1.0892007680516775, "learning_rate": 8.135332538008195e-06, "loss": 0.9414, "step": 4066 }, { "epoch": 0.5730186685452624, "grad_norm": 1.3166093561092478, "learning_rate": 8.130849157210583e-06, "loss": 0.48, "step": 4067 }, { "epoch": 0.5731595632264882, "grad_norm": 1.1851949020567476, "learning_rate": 8.126366165693213e-06, "loss": 0.91, "step": 4068 }, { "epoch": 0.573300457907714, "grad_norm": 0.9855643869658501, "learning_rate": 8.121883564389748e-06, "loss": 0.8881, "step": 4069 }, { "epoch": 0.5734413525889398, "grad_norm": 1.0749062768091326, "learning_rate": 8.117401354233746e-06, "loss": 0.8031, "step": 4070 }, { "epoch": 0.5735822472701656, "grad_norm": 0.974544255860359, "learning_rate": 8.112919536158717e-06, "loss": 0.8712, "step": 4071 }, { "epoch": 0.5737231419513913, "grad_norm": 0.916546408365468, "learning_rate": 8.108438111098055e-06, "loss": 0.9382, "step": 4072 }, { "epoch": 0.5738640366326171, "grad_norm": 1.09661976088996, "learning_rate": 8.103957079985096e-06, "loss": 0.7967, "step": 4073 }, { "epoch": 0.5740049313138429, "grad_norm": 1.0782223332941325, "learning_rate": 8.099476443753082e-06, "loss": 0.9111, "step": 4074 }, { "epoch": 0.5741458259950687, "grad_norm": 1.0657204047690567, "learning_rate": 8.094996203335179e-06, "loss": 0.8808, "step": 4075 }, { "epoch": 0.5742867206762945, "grad_norm": 0.9888151400779255, "learning_rate": 8.090516359664467e-06, "loss": 0.8945, "step": 4076 }, { "epoch": 0.5744276153575203, "grad_norm": 0.8883585013793286, "learning_rate": 8.08603691367394e-06, "loss": 0.8474, "step": 4077 }, { "epoch": 0.574568510038746, "grad_norm": 1.3543153814118052, "learning_rate": 8.081557866296523e-06, "loss": 0.4563, "step": 4078 }, { "epoch": 0.5747094047199718, "grad_norm": 0.9332694805406269, "learning_rate": 8.077079218465039e-06, "loss": 0.8562, "step": 4079 }, { "epoch": 0.5748502994011976, "grad_norm": 1.004357665866166, "learning_rate": 8.072600971112242e-06, "loss": 0.9031, "step": 4080 }, { "epoch": 0.5749911940824234, "grad_norm": 1.0998362370615695, "learning_rate": 8.068123125170793e-06, "loss": 0.8722, "step": 4081 }, { "epoch": 0.5751320887636492, "grad_norm": 1.2440934850156795, "learning_rate": 8.063645681573287e-06, "loss": 0.6549, "step": 4082 }, { "epoch": 0.575272983444875, "grad_norm": 0.9766359158769554, "learning_rate": 8.059168641252207e-06, "loss": 0.7807, "step": 4083 }, { "epoch": 0.5754138781261008, "grad_norm": 0.9760503209272791, "learning_rate": 8.054692005139976e-06, "loss": 0.8205, "step": 4084 }, { "epoch": 0.5755547728073265, "grad_norm": 0.9699918727387887, "learning_rate": 8.050215774168924e-06, "loss": 0.8718, "step": 4085 }, { "epoch": 0.5756956674885523, "grad_norm": 1.0121279773911902, "learning_rate": 8.045739949271294e-06, "loss": 0.8897, "step": 4086 }, { "epoch": 0.5758365621697781, "grad_norm": 1.4395963291576293, "learning_rate": 8.04126453137925e-06, "loss": 0.5943, "step": 4087 }, { "epoch": 0.5759774568510039, "grad_norm": 1.0253186149510025, "learning_rate": 8.036789521424869e-06, "loss": 0.8709, "step": 4088 }, { "epoch": 0.5761183515322297, "grad_norm": 1.346237213870475, "learning_rate": 8.032314920340138e-06, "loss": 0.4534, "step": 4089 }, { "epoch": 0.5762592462134555, "grad_norm": 1.536910195328434, "learning_rate": 8.027840729056971e-06, "loss": 0.5715, "step": 4090 }, { "epoch": 0.5764001408946813, "grad_norm": 1.2407601744829655, "learning_rate": 8.02336694850718e-06, "loss": 0.6081, "step": 4091 }, { "epoch": 0.576541035575907, "grad_norm": 1.0467537199283448, "learning_rate": 8.01889357962251e-06, "loss": 0.8699, "step": 4092 }, { "epoch": 0.5766819302571328, "grad_norm": 1.0524789176983897, "learning_rate": 8.014420623334602e-06, "loss": 0.8428, "step": 4093 }, { "epoch": 0.5768228249383586, "grad_norm": 0.9381401247548657, "learning_rate": 8.009948080575028e-06, "loss": 0.8336, "step": 4094 }, { "epoch": 0.5769637196195844, "grad_norm": 1.2330707130339513, "learning_rate": 8.00547595227526e-06, "loss": 0.4625, "step": 4095 }, { "epoch": 0.5771046143008102, "grad_norm": 1.0172044058800611, "learning_rate": 8.001004239366694e-06, "loss": 0.8393, "step": 4096 }, { "epoch": 0.577245508982036, "grad_norm": 1.2361408445570365, "learning_rate": 7.99653294278063e-06, "loss": 0.5348, "step": 4097 }, { "epoch": 0.5773864036632617, "grad_norm": 1.260158217625257, "learning_rate": 7.99206206344829e-06, "loss": 0.4769, "step": 4098 }, { "epoch": 0.5775272983444875, "grad_norm": 0.930915225935335, "learning_rate": 7.987591602300802e-06, "loss": 0.8508, "step": 4099 }, { "epoch": 0.5776681930257133, "grad_norm": 1.3436762320388176, "learning_rate": 7.983121560269219e-06, "loss": 0.4682, "step": 4100 }, { "epoch": 0.5778090877069391, "grad_norm": 1.01198408118046, "learning_rate": 7.978651938284486e-06, "loss": 0.8668, "step": 4101 }, { "epoch": 0.5779499823881649, "grad_norm": 1.0879095407220392, "learning_rate": 7.974182737277479e-06, "loss": 0.7925, "step": 4102 }, { "epoch": 0.5780908770693907, "grad_norm": 1.1552856939515497, "learning_rate": 7.969713958178979e-06, "loss": 0.8475, "step": 4103 }, { "epoch": 0.5782317717506165, "grad_norm": 0.9934879936342953, "learning_rate": 7.965245601919682e-06, "loss": 0.8983, "step": 4104 }, { "epoch": 0.5783726664318422, "grad_norm": 1.1444451223702343, "learning_rate": 7.960777669430188e-06, "loss": 0.8187, "step": 4105 }, { "epoch": 0.578513561113068, "grad_norm": 1.1344150538714752, "learning_rate": 7.956310161641023e-06, "loss": 0.8902, "step": 4106 }, { "epoch": 0.5786544557942938, "grad_norm": 1.1581630510618666, "learning_rate": 7.951843079482609e-06, "loss": 0.5274, "step": 4107 }, { "epoch": 0.5787953504755196, "grad_norm": 1.237509011019804, "learning_rate": 7.947376423885291e-06, "loss": 0.4234, "step": 4108 }, { "epoch": 0.5789362451567454, "grad_norm": 1.1373438480674773, "learning_rate": 7.942910195779313e-06, "loss": 0.8677, "step": 4109 }, { "epoch": 0.5790771398379712, "grad_norm": 0.9067552733694138, "learning_rate": 7.938444396094851e-06, "loss": 0.8718, "step": 4110 }, { "epoch": 0.5792180345191968, "grad_norm": 1.0035602136624735, "learning_rate": 7.933979025761967e-06, "loss": 0.8971, "step": 4111 }, { "epoch": 0.5793589292004226, "grad_norm": 1.400687141118707, "learning_rate": 7.92951408571065e-06, "loss": 0.572, "step": 4112 }, { "epoch": 0.5794998238816484, "grad_norm": 1.0872424314187044, "learning_rate": 7.925049576870793e-06, "loss": 0.8446, "step": 4113 }, { "epoch": 0.5796407185628742, "grad_norm": 1.0245145119965358, "learning_rate": 7.920585500172204e-06, "loss": 0.8182, "step": 4114 }, { "epoch": 0.5797816132441, "grad_norm": 0.8596284533150531, "learning_rate": 7.916121856544591e-06, "loss": 0.8185, "step": 4115 }, { "epoch": 0.5799225079253258, "grad_norm": 0.9016829302476614, "learning_rate": 7.911658646917586e-06, "loss": 0.8874, "step": 4116 }, { "epoch": 0.5800634026065516, "grad_norm": 1.4249414377927432, "learning_rate": 7.90719587222072e-06, "loss": 0.6961, "step": 4117 }, { "epoch": 0.5802042972877773, "grad_norm": 0.9142520612922098, "learning_rate": 7.902733533383432e-06, "loss": 0.8536, "step": 4118 }, { "epoch": 0.5803451919690031, "grad_norm": 0.9408532408017584, "learning_rate": 7.89827163133508e-06, "loss": 0.8439, "step": 4119 }, { "epoch": 0.5804860866502289, "grad_norm": 0.9177717380066968, "learning_rate": 7.893810167004922e-06, "loss": 0.8477, "step": 4120 }, { "epoch": 0.5806269813314547, "grad_norm": 0.8897139288721346, "learning_rate": 7.889349141322136e-06, "loss": 0.8722, "step": 4121 }, { "epoch": 0.5807678760126805, "grad_norm": 1.0824499956906004, "learning_rate": 7.884888555215789e-06, "loss": 0.8905, "step": 4122 }, { "epoch": 0.5809087706939063, "grad_norm": 1.1471928206122215, "learning_rate": 7.880428409614879e-06, "loss": 0.8853, "step": 4123 }, { "epoch": 0.5810496653751321, "grad_norm": 1.1084273614481799, "learning_rate": 7.875968705448298e-06, "loss": 0.8623, "step": 4124 }, { "epoch": 0.5811905600563578, "grad_norm": 1.0282007154014183, "learning_rate": 7.871509443644852e-06, "loss": 0.8524, "step": 4125 }, { "epoch": 0.5813314547375836, "grad_norm": 1.1020717085589828, "learning_rate": 7.86705062513325e-06, "loss": 0.8831, "step": 4126 }, { "epoch": 0.5814723494188094, "grad_norm": 0.9256984602852155, "learning_rate": 7.862592250842114e-06, "loss": 0.8821, "step": 4127 }, { "epoch": 0.5816132441000352, "grad_norm": 1.0558659486415458, "learning_rate": 7.858134321699969e-06, "loss": 0.8423, "step": 4128 }, { "epoch": 0.581754138781261, "grad_norm": 1.2538206674402232, "learning_rate": 7.853676838635251e-06, "loss": 0.6066, "step": 4129 }, { "epoch": 0.5818950334624868, "grad_norm": 1.012296925280977, "learning_rate": 7.849219802576296e-06, "loss": 0.8191, "step": 4130 }, { "epoch": 0.5820359281437125, "grad_norm": 1.5752279591835634, "learning_rate": 7.844763214451365e-06, "loss": 0.6465, "step": 4131 }, { "epoch": 0.5821768228249383, "grad_norm": 1.0093105121523862, "learning_rate": 7.840307075188595e-06, "loss": 0.8925, "step": 4132 }, { "epoch": 0.5823177175061641, "grad_norm": 1.0846465198086253, "learning_rate": 7.835851385716065e-06, "loss": 0.8776, "step": 4133 }, { "epoch": 0.5824586121873899, "grad_norm": 0.9633717998047493, "learning_rate": 7.83139614696173e-06, "loss": 0.8918, "step": 4134 }, { "epoch": 0.5825995068686157, "grad_norm": 0.9904502462844138, "learning_rate": 7.826941359853476e-06, "loss": 0.839, "step": 4135 }, { "epoch": 0.5827404015498415, "grad_norm": 1.0603377163383332, "learning_rate": 7.82248702531907e-06, "loss": 0.8912, "step": 4136 }, { "epoch": 0.5828812962310673, "grad_norm": 0.925066909390809, "learning_rate": 7.818033144286209e-06, "loss": 0.7768, "step": 4137 }, { "epoch": 0.583022190912293, "grad_norm": 1.3557709062320626, "learning_rate": 7.813579717682473e-06, "loss": 0.4508, "step": 4138 }, { "epoch": 0.5831630855935188, "grad_norm": 1.0492153009177938, "learning_rate": 7.80912674643537e-06, "loss": 0.8817, "step": 4139 }, { "epoch": 0.5833039802747446, "grad_norm": 1.081216077990175, "learning_rate": 7.80467423147229e-06, "loss": 0.8983, "step": 4140 }, { "epoch": 0.5834448749559704, "grad_norm": 1.017260095654136, "learning_rate": 7.800222173720552e-06, "loss": 0.8678, "step": 4141 }, { "epoch": 0.5835857696371962, "grad_norm": 1.0605051749650112, "learning_rate": 7.795770574107358e-06, "loss": 0.896, "step": 4142 }, { "epoch": 0.583726664318422, "grad_norm": 1.411006279799574, "learning_rate": 7.79131943355983e-06, "loss": 0.5083, "step": 4143 }, { "epoch": 0.5838675589996478, "grad_norm": 1.0243271036170412, "learning_rate": 7.786868753004984e-06, "loss": 0.7983, "step": 4144 }, { "epoch": 0.5840084536808735, "grad_norm": 1.0769466529496774, "learning_rate": 7.782418533369748e-06, "loss": 0.8487, "step": 4145 }, { "epoch": 0.5841493483620993, "grad_norm": 1.2974791918879303, "learning_rate": 7.77796877558095e-06, "loss": 0.5187, "step": 4146 }, { "epoch": 0.5842902430433251, "grad_norm": 1.2637293642664666, "learning_rate": 7.77351948056532e-06, "loss": 0.7064, "step": 4147 }, { "epoch": 0.5844311377245509, "grad_norm": 0.9594240837934679, "learning_rate": 7.769070649249497e-06, "loss": 0.8565, "step": 4148 }, { "epoch": 0.5845720324057767, "grad_norm": 1.3084992162964368, "learning_rate": 7.764622282560019e-06, "loss": 0.5394, "step": 4149 }, { "epoch": 0.5847129270870025, "grad_norm": 1.1362363996257947, "learning_rate": 7.760174381423328e-06, "loss": 0.8195, "step": 4150 }, { "epoch": 0.5848538217682282, "grad_norm": 0.8754784177994597, "learning_rate": 7.755726946765773e-06, "loss": 0.8651, "step": 4151 }, { "epoch": 0.584994716449454, "grad_norm": 1.0901093910049988, "learning_rate": 7.7512799795136e-06, "loss": 0.8903, "step": 4152 }, { "epoch": 0.5851356111306798, "grad_norm": 0.9050493022025358, "learning_rate": 7.746833480592962e-06, "loss": 0.8627, "step": 4153 }, { "epoch": 0.5852765058119056, "grad_norm": 1.004482742727352, "learning_rate": 7.74238745092991e-06, "loss": 0.8597, "step": 4154 }, { "epoch": 0.5854174004931314, "grad_norm": 1.0108653921759616, "learning_rate": 7.737941891450406e-06, "loss": 0.8193, "step": 4155 }, { "epoch": 0.5855582951743572, "grad_norm": 1.0744842089627205, "learning_rate": 7.7334968030803e-06, "loss": 0.8126, "step": 4156 }, { "epoch": 0.585699189855583, "grad_norm": 1.1672952773901317, "learning_rate": 7.729052186745358e-06, "loss": 0.5292, "step": 4157 }, { "epoch": 0.5858400845368087, "grad_norm": 1.0434846169085634, "learning_rate": 7.724608043371235e-06, "loss": 0.8266, "step": 4158 }, { "epoch": 0.5859809792180345, "grad_norm": 0.9228483824614808, "learning_rate": 7.720164373883507e-06, "loss": 0.8617, "step": 4159 }, { "epoch": 0.5861218738992603, "grad_norm": 1.2390449824193757, "learning_rate": 7.715721179207627e-06, "loss": 0.4517, "step": 4160 }, { "epoch": 0.5862627685804861, "grad_norm": 1.3680842750644304, "learning_rate": 7.711278460268959e-06, "loss": 0.5559, "step": 4161 }, { "epoch": 0.5864036632617119, "grad_norm": 0.9326147191434102, "learning_rate": 7.706836217992783e-06, "loss": 0.7678, "step": 4162 }, { "epoch": 0.5865445579429377, "grad_norm": 0.918318741521578, "learning_rate": 7.702394453304249e-06, "loss": 0.8618, "step": 4163 }, { "epoch": 0.5866854526241634, "grad_norm": 1.1298997018291674, "learning_rate": 7.69795316712844e-06, "loss": 0.9268, "step": 4164 }, { "epoch": 0.5868263473053892, "grad_norm": 1.1661652145639365, "learning_rate": 7.693512360390315e-06, "loss": 0.6135, "step": 4165 }, { "epoch": 0.586967241986615, "grad_norm": 0.9149976846952869, "learning_rate": 7.689072034014747e-06, "loss": 0.8406, "step": 4166 }, { "epoch": 0.5871081366678408, "grad_norm": 0.9662148794476461, "learning_rate": 7.6846321889265e-06, "loss": 0.8508, "step": 4167 }, { "epoch": 0.5872490313490666, "grad_norm": 1.3742947636110272, "learning_rate": 7.680192826050245e-06, "loss": 0.4838, "step": 4168 }, { "epoch": 0.5873899260302924, "grad_norm": 1.450880582766079, "learning_rate": 7.675753946310548e-06, "loss": 0.6272, "step": 4169 }, { "epoch": 0.5875308207115182, "grad_norm": 0.9316046264512049, "learning_rate": 7.671315550631879e-06, "loss": 0.7675, "step": 4170 }, { "epoch": 0.5876717153927439, "grad_norm": 1.0041071424815595, "learning_rate": 7.666877639938597e-06, "loss": 0.9119, "step": 4171 }, { "epoch": 0.5878126100739697, "grad_norm": 0.9831592550453717, "learning_rate": 7.66244021515498e-06, "loss": 0.8304, "step": 4172 }, { "epoch": 0.5879535047551955, "grad_norm": 0.9793410333366089, "learning_rate": 7.658003277205176e-06, "loss": 0.9405, "step": 4173 }, { "epoch": 0.5880943994364213, "grad_norm": 0.92584792268049, "learning_rate": 7.653566827013262e-06, "loss": 0.8208, "step": 4174 }, { "epoch": 0.5882352941176471, "grad_norm": 0.878118042686169, "learning_rate": 7.649130865503186e-06, "loss": 0.8946, "step": 4175 }, { "epoch": 0.5883761887988729, "grad_norm": 1.304849707345746, "learning_rate": 7.64469539359882e-06, "loss": 0.4647, "step": 4176 }, { "epoch": 0.5885170834800987, "grad_norm": 1.1447975460720166, "learning_rate": 7.640260412223913e-06, "loss": 0.5168, "step": 4177 }, { "epoch": 0.5886579781613244, "grad_norm": 0.9006309803716344, "learning_rate": 7.635825922302122e-06, "loss": 0.836, "step": 4178 }, { "epoch": 0.5887988728425502, "grad_norm": 0.9096724430032067, "learning_rate": 7.631391924756995e-06, "loss": 0.8328, "step": 4179 }, { "epoch": 0.588939767523776, "grad_norm": 0.9894198843188946, "learning_rate": 7.626958420511994e-06, "loss": 0.864, "step": 4180 }, { "epoch": 0.5890806622050018, "grad_norm": 1.048829197116692, "learning_rate": 7.622525410490453e-06, "loss": 0.8849, "step": 4181 }, { "epoch": 0.5892215568862276, "grad_norm": 1.2456644470838867, "learning_rate": 7.618092895615627e-06, "loss": 0.4931, "step": 4182 }, { "epoch": 0.5893624515674534, "grad_norm": 1.3534162925945654, "learning_rate": 7.6136608768106525e-06, "loss": 0.5885, "step": 4183 }, { "epoch": 0.5895033462486791, "grad_norm": 1.0599600930271322, "learning_rate": 7.609229354998568e-06, "loss": 0.822, "step": 4184 }, { "epoch": 0.5896442409299049, "grad_norm": 0.8945525353242952, "learning_rate": 7.604798331102308e-06, "loss": 0.872, "step": 4185 }, { "epoch": 0.5897851356111307, "grad_norm": 1.0429475908758306, "learning_rate": 7.600367806044706e-06, "loss": 0.8777, "step": 4186 }, { "epoch": 0.5899260302923565, "grad_norm": 1.3614024206681972, "learning_rate": 7.595937780748484e-06, "loss": 0.5654, "step": 4187 }, { "epoch": 0.5900669249735823, "grad_norm": 1.0843251702225134, "learning_rate": 7.5915082561362685e-06, "loss": 0.863, "step": 4188 }, { "epoch": 0.5902078196548081, "grad_norm": 1.0058192904621128, "learning_rate": 7.587079233130573e-06, "loss": 0.895, "step": 4189 }, { "epoch": 0.5903487143360339, "grad_norm": 1.0906587937907228, "learning_rate": 7.582650712653823e-06, "loss": 0.8481, "step": 4190 }, { "epoch": 0.5904896090172596, "grad_norm": 0.9930818790510136, "learning_rate": 7.578222695628314e-06, "loss": 0.8082, "step": 4191 }, { "epoch": 0.5906305036984854, "grad_norm": 1.2654085773930026, "learning_rate": 7.573795182976261e-06, "loss": 0.4568, "step": 4192 }, { "epoch": 0.5907713983797112, "grad_norm": 1.0637201474785665, "learning_rate": 7.569368175619759e-06, "loss": 0.5064, "step": 4193 }, { "epoch": 0.590912293060937, "grad_norm": 1.0267186521993459, "learning_rate": 7.564941674480804e-06, "loss": 0.8696, "step": 4194 }, { "epoch": 0.5910531877421628, "grad_norm": 1.1290077359995003, "learning_rate": 7.560515680481283e-06, "loss": 0.8786, "step": 4195 }, { "epoch": 0.5911940824233886, "grad_norm": 1.0265765326727219, "learning_rate": 7.556090194542982e-06, "loss": 0.8577, "step": 4196 }, { "epoch": 0.5913349771046142, "grad_norm": 1.2181827610038756, "learning_rate": 7.551665217587576e-06, "loss": 0.5442, "step": 4197 }, { "epoch": 0.59147587178584, "grad_norm": 1.1690807987920315, "learning_rate": 7.547240750536639e-06, "loss": 0.8847, "step": 4198 }, { "epoch": 0.5916167664670658, "grad_norm": 1.0958514765911853, "learning_rate": 7.542816794311629e-06, "loss": 0.8159, "step": 4199 }, { "epoch": 0.5917576611482916, "grad_norm": 1.2571996017791234, "learning_rate": 7.538393349833919e-06, "loss": 0.4762, "step": 4200 }, { "epoch": 0.5918985558295174, "grad_norm": 1.1087230263485528, "learning_rate": 7.53397041802475e-06, "loss": 0.8727, "step": 4201 }, { "epoch": 0.5920394505107432, "grad_norm": 0.9686477998374502, "learning_rate": 7.529547999805266e-06, "loss": 0.9306, "step": 4202 }, { "epoch": 0.592180345191969, "grad_norm": 0.967171667685686, "learning_rate": 7.525126096096518e-06, "loss": 0.886, "step": 4203 }, { "epoch": 0.5923212398731947, "grad_norm": 0.9826991972914172, "learning_rate": 7.5207047078194214e-06, "loss": 0.8234, "step": 4204 }, { "epoch": 0.5924621345544205, "grad_norm": 1.0335894618633823, "learning_rate": 7.5162838358948135e-06, "loss": 0.8397, "step": 4205 }, { "epoch": 0.5926030292356463, "grad_norm": 1.0124762664270126, "learning_rate": 7.511863481243403e-06, "loss": 0.8404, "step": 4206 }, { "epoch": 0.5927439239168721, "grad_norm": 0.9225376630215633, "learning_rate": 7.507443644785804e-06, "loss": 0.8107, "step": 4207 }, { "epoch": 0.5928848185980979, "grad_norm": 1.326079767949769, "learning_rate": 7.503024327442513e-06, "loss": 0.5477, "step": 4208 }, { "epoch": 0.5930257132793237, "grad_norm": 1.1017857766829524, "learning_rate": 7.498605530133927e-06, "loss": 0.8812, "step": 4209 }, { "epoch": 0.5931666079605495, "grad_norm": 1.0934255830046018, "learning_rate": 7.494187253780323e-06, "loss": 0.81, "step": 4210 }, { "epoch": 0.5933075026417752, "grad_norm": 1.3616150600261319, "learning_rate": 7.489769499301889e-06, "loss": 0.5708, "step": 4211 }, { "epoch": 0.593448397323001, "grad_norm": 1.0177509917208833, "learning_rate": 7.485352267618679e-06, "loss": 0.8152, "step": 4212 }, { "epoch": 0.5935892920042268, "grad_norm": 1.0531162103758447, "learning_rate": 7.480935559650662e-06, "loss": 0.9107, "step": 4213 }, { "epoch": 0.5937301866854526, "grad_norm": 0.9667688676077486, "learning_rate": 7.47651937631768e-06, "loss": 0.8581, "step": 4214 }, { "epoch": 0.5938710813666784, "grad_norm": 1.017630282551361, "learning_rate": 7.47210371853948e-06, "loss": 0.8313, "step": 4215 }, { "epoch": 0.5940119760479042, "grad_norm": 1.1621873073409852, "learning_rate": 7.467688587235686e-06, "loss": 0.8664, "step": 4216 }, { "epoch": 0.5941528707291299, "grad_norm": 1.0507630797468615, "learning_rate": 7.463273983325823e-06, "loss": 0.8505, "step": 4217 }, { "epoch": 0.5942937654103557, "grad_norm": 1.2536449915003678, "learning_rate": 7.4588599077293e-06, "loss": 0.4305, "step": 4218 }, { "epoch": 0.5944346600915815, "grad_norm": 1.2157555323909472, "learning_rate": 7.454446361365422e-06, "loss": 0.491, "step": 4219 }, { "epoch": 0.5945755547728073, "grad_norm": 0.9153195387069742, "learning_rate": 7.450033345153371e-06, "loss": 0.8707, "step": 4220 }, { "epoch": 0.5947164494540331, "grad_norm": 0.9225107997429633, "learning_rate": 7.445620860012239e-06, "loss": 0.8476, "step": 4221 }, { "epoch": 0.5948573441352589, "grad_norm": 0.9633580615786221, "learning_rate": 7.441208906860986e-06, "loss": 0.8726, "step": 4222 }, { "epoch": 0.5949982388164847, "grad_norm": 1.0069484658265775, "learning_rate": 7.436797486618479e-06, "loss": 0.865, "step": 4223 }, { "epoch": 0.5951391334977104, "grad_norm": 1.2342257407946007, "learning_rate": 7.43238660020346e-06, "loss": 0.5731, "step": 4224 }, { "epoch": 0.5952800281789362, "grad_norm": 1.2648399985034302, "learning_rate": 7.427976248534569e-06, "loss": 0.4649, "step": 4225 }, { "epoch": 0.595420922860162, "grad_norm": 1.0529671274111267, "learning_rate": 7.423566432530331e-06, "loss": 0.8548, "step": 4226 }, { "epoch": 0.5955618175413878, "grad_norm": 0.9682549714896025, "learning_rate": 7.419157153109161e-06, "loss": 0.889, "step": 4227 }, { "epoch": 0.5957027122226136, "grad_norm": 0.9719709808545482, "learning_rate": 7.414748411189358e-06, "loss": 0.8572, "step": 4228 }, { "epoch": 0.5958436069038394, "grad_norm": 1.0477651414158926, "learning_rate": 7.410340207689116e-06, "loss": 0.8668, "step": 4229 }, { "epoch": 0.5959845015850652, "grad_norm": 0.9006252069776609, "learning_rate": 7.405932543526509e-06, "loss": 0.8324, "step": 4230 }, { "epoch": 0.5961253962662909, "grad_norm": 1.415108717301392, "learning_rate": 7.40152541961951e-06, "loss": 0.5721, "step": 4231 }, { "epoch": 0.5962662909475167, "grad_norm": 1.1768457679051885, "learning_rate": 7.397118836885962e-06, "loss": 0.5253, "step": 4232 }, { "epoch": 0.5964071856287425, "grad_norm": 1.0242666534101086, "learning_rate": 7.392712796243614e-06, "loss": 0.9224, "step": 4233 }, { "epoch": 0.5965480803099683, "grad_norm": 0.97511314854284, "learning_rate": 7.388307298610089e-06, "loss": 0.8631, "step": 4234 }, { "epoch": 0.5966889749911941, "grad_norm": 1.4286524078005458, "learning_rate": 7.383902344902906e-06, "loss": 0.5845, "step": 4235 }, { "epoch": 0.5968298696724199, "grad_norm": 1.2639844773700417, "learning_rate": 7.379497936039464e-06, "loss": 0.4496, "step": 4236 }, { "epoch": 0.5969707643536456, "grad_norm": 0.9961348099893367, "learning_rate": 7.37509407293705e-06, "loss": 0.8606, "step": 4237 }, { "epoch": 0.5971116590348714, "grad_norm": 1.4700102582552954, "learning_rate": 7.370690756512835e-06, "loss": 0.6107, "step": 4238 }, { "epoch": 0.5972525537160972, "grad_norm": 1.2323898002889295, "learning_rate": 7.36628798768389e-06, "loss": 0.5609, "step": 4239 }, { "epoch": 0.597393448397323, "grad_norm": 0.9716029237516649, "learning_rate": 7.361885767367148e-06, "loss": 0.8348, "step": 4240 }, { "epoch": 0.5975343430785488, "grad_norm": 1.1302952170318525, "learning_rate": 7.357484096479452e-06, "loss": 0.4766, "step": 4241 }, { "epoch": 0.5976752377597746, "grad_norm": 0.9538272634647477, "learning_rate": 7.3530829759375136e-06, "loss": 0.8649, "step": 4242 }, { "epoch": 0.5978161324410004, "grad_norm": 1.3274599919973575, "learning_rate": 7.348682406657939e-06, "loss": 0.5847, "step": 4243 }, { "epoch": 0.5979570271222261, "grad_norm": 0.9201814849988141, "learning_rate": 7.3442823895572135e-06, "loss": 0.8327, "step": 4244 }, { "epoch": 0.5980979218034519, "grad_norm": 1.0004156859713667, "learning_rate": 7.339882925551711e-06, "loss": 0.8556, "step": 4245 }, { "epoch": 0.5982388164846777, "grad_norm": 1.2296024753220385, "learning_rate": 7.335484015557692e-06, "loss": 0.5628, "step": 4246 }, { "epoch": 0.5983797111659035, "grad_norm": 1.0088212461732222, "learning_rate": 7.331085660491295e-06, "loss": 0.9434, "step": 4247 }, { "epoch": 0.5985206058471293, "grad_norm": 0.9724739638184909, "learning_rate": 7.326687861268551e-06, "loss": 0.871, "step": 4248 }, { "epoch": 0.5986615005283551, "grad_norm": 0.9018900021237788, "learning_rate": 7.322290618805368e-06, "loss": 0.8467, "step": 4249 }, { "epoch": 0.5988023952095808, "grad_norm": 1.3806765898178721, "learning_rate": 7.317893934017544e-06, "loss": 0.6529, "step": 4250 }, { "epoch": 0.5989432898908066, "grad_norm": 1.256289945880099, "learning_rate": 7.313497807820753e-06, "loss": 0.4384, "step": 4251 }, { "epoch": 0.5990841845720324, "grad_norm": 1.0891067717594571, "learning_rate": 7.309102241130569e-06, "loss": 0.8521, "step": 4252 }, { "epoch": 0.5992250792532582, "grad_norm": 1.0677732643962998, "learning_rate": 7.304707234862425e-06, "loss": 0.8921, "step": 4253 }, { "epoch": 0.599365973934484, "grad_norm": 1.3345421536788462, "learning_rate": 7.300312789931661e-06, "loss": 0.4516, "step": 4254 }, { "epoch": 0.5995068686157098, "grad_norm": 0.9788342931569605, "learning_rate": 7.295918907253483e-06, "loss": 0.8724, "step": 4255 }, { "epoch": 0.5996477632969356, "grad_norm": 1.1830303048514246, "learning_rate": 7.291525587742991e-06, "loss": 0.4683, "step": 4256 }, { "epoch": 0.5997886579781613, "grad_norm": 0.9751272844395835, "learning_rate": 7.28713283231516e-06, "loss": 0.8844, "step": 4257 }, { "epoch": 0.5999295526593871, "grad_norm": 0.9848815232164195, "learning_rate": 7.282740641884855e-06, "loss": 0.8576, "step": 4258 }, { "epoch": 0.6000704473406129, "grad_norm": 1.3376372847543843, "learning_rate": 7.278349017366814e-06, "loss": 0.484, "step": 4259 }, { "epoch": 0.6002113420218387, "grad_norm": 1.2307252810697584, "learning_rate": 7.273957959675666e-06, "loss": 0.4417, "step": 4260 }, { "epoch": 0.6003522367030645, "grad_norm": 1.069513338399321, "learning_rate": 7.2695674697259135e-06, "loss": 0.9217, "step": 4261 }, { "epoch": 0.6004931313842903, "grad_norm": 1.1326439092779277, "learning_rate": 7.265177548431954e-06, "loss": 0.8934, "step": 4262 }, { "epoch": 0.6006340260655161, "grad_norm": 1.0789595457108423, "learning_rate": 7.260788196708051e-06, "loss": 0.8639, "step": 4263 }, { "epoch": 0.6007749207467418, "grad_norm": 0.8991970692379165, "learning_rate": 7.256399415468361e-06, "loss": 0.8148, "step": 4264 }, { "epoch": 0.6009158154279676, "grad_norm": 1.0604367333169786, "learning_rate": 7.252011205626911e-06, "loss": 0.8771, "step": 4265 }, { "epoch": 0.6010567101091934, "grad_norm": 0.924393353680361, "learning_rate": 7.247623568097621e-06, "loss": 0.7872, "step": 4266 }, { "epoch": 0.6011976047904192, "grad_norm": 1.05601835699746, "learning_rate": 7.243236503794284e-06, "loss": 0.9281, "step": 4267 }, { "epoch": 0.601338499471645, "grad_norm": 1.0088256239166296, "learning_rate": 7.238850013630576e-06, "loss": 0.8831, "step": 4268 }, { "epoch": 0.6014793941528708, "grad_norm": 1.0601808600360334, "learning_rate": 7.23446409852005e-06, "loss": 0.8574, "step": 4269 }, { "epoch": 0.6016202888340965, "grad_norm": 0.9928840668112944, "learning_rate": 7.230078759376149e-06, "loss": 0.8682, "step": 4270 }, { "epoch": 0.6017611835153223, "grad_norm": 1.3072377697696693, "learning_rate": 7.22569399711218e-06, "loss": 0.513, "step": 4271 }, { "epoch": 0.6019020781965481, "grad_norm": 1.2533625419191052, "learning_rate": 7.221309812641348e-06, "loss": 0.4786, "step": 4272 }, { "epoch": 0.6020429728777739, "grad_norm": 1.070079561837356, "learning_rate": 7.216926206876723e-06, "loss": 0.8514, "step": 4273 }, { "epoch": 0.6021838675589997, "grad_norm": 0.9931675831976299, "learning_rate": 7.212543180731264e-06, "loss": 0.8723, "step": 4274 }, { "epoch": 0.6023247622402255, "grad_norm": 1.3044929390664253, "learning_rate": 7.208160735117801e-06, "loss": 0.5407, "step": 4275 }, { "epoch": 0.6024656569214513, "grad_norm": 1.5592030886971282, "learning_rate": 7.203778870949054e-06, "loss": 0.6058, "step": 4276 }, { "epoch": 0.602606551602677, "grad_norm": 1.0451872309488828, "learning_rate": 7.19939758913761e-06, "loss": 0.8602, "step": 4277 }, { "epoch": 0.6027474462839028, "grad_norm": 1.2595411964927565, "learning_rate": 7.195016890595944e-06, "loss": 0.8724, "step": 4278 }, { "epoch": 0.6028883409651286, "grad_norm": 1.2975390089452745, "learning_rate": 7.190636776236399e-06, "loss": 0.5623, "step": 4279 }, { "epoch": 0.6030292356463544, "grad_norm": 1.0027242164991654, "learning_rate": 7.186257246971216e-06, "loss": 0.816, "step": 4280 }, { "epoch": 0.6031701303275802, "grad_norm": 0.9456151623184322, "learning_rate": 7.181878303712486e-06, "loss": 0.8555, "step": 4281 }, { "epoch": 0.603311025008806, "grad_norm": 0.9511758077645242, "learning_rate": 7.177499947372206e-06, "loss": 0.8937, "step": 4282 }, { "epoch": 0.6034519196900316, "grad_norm": 1.0869308933269386, "learning_rate": 7.173122178862229e-06, "loss": 0.8901, "step": 4283 }, { "epoch": 0.6035928143712574, "grad_norm": 0.9416051010421922, "learning_rate": 7.168744999094302e-06, "loss": 0.8587, "step": 4284 }, { "epoch": 0.6037337090524832, "grad_norm": 1.0636999057061196, "learning_rate": 7.1643684089800384e-06, "loss": 0.8465, "step": 4285 }, { "epoch": 0.603874603733709, "grad_norm": 0.9993511282911873, "learning_rate": 7.159992409430929e-06, "loss": 0.8309, "step": 4286 }, { "epoch": 0.6040154984149348, "grad_norm": 1.0614527676652077, "learning_rate": 7.155617001358351e-06, "loss": 0.8623, "step": 4287 }, { "epoch": 0.6041563930961606, "grad_norm": 1.022603466447665, "learning_rate": 7.151242185673548e-06, "loss": 0.864, "step": 4288 }, { "epoch": 0.6042972877773864, "grad_norm": 1.1731014788105942, "learning_rate": 7.146867963287647e-06, "loss": 0.5689, "step": 4289 }, { "epoch": 0.6044381824586121, "grad_norm": 0.9226493698319669, "learning_rate": 7.1424943351116475e-06, "loss": 0.8545, "step": 4290 }, { "epoch": 0.6045790771398379, "grad_norm": 1.0185651998896654, "learning_rate": 7.138121302056429e-06, "loss": 0.8339, "step": 4291 }, { "epoch": 0.6047199718210637, "grad_norm": 1.0322459439031255, "learning_rate": 7.133748865032739e-06, "loss": 0.9252, "step": 4292 }, { "epoch": 0.6048608665022895, "grad_norm": 0.9143163668051456, "learning_rate": 7.129377024951216e-06, "loss": 0.8232, "step": 4293 }, { "epoch": 0.6050017611835153, "grad_norm": 0.8940980822545734, "learning_rate": 7.125005782722356e-06, "loss": 0.8354, "step": 4294 }, { "epoch": 0.6051426558647411, "grad_norm": 1.2448615831367227, "learning_rate": 7.120635139256547e-06, "loss": 0.4584, "step": 4295 }, { "epoch": 0.6052835505459669, "grad_norm": 1.3443232078787817, "learning_rate": 7.116265095464039e-06, "loss": 0.5729, "step": 4296 }, { "epoch": 0.6054244452271926, "grad_norm": 1.1876049547797431, "learning_rate": 7.1118956522549674e-06, "loss": 0.4976, "step": 4297 }, { "epoch": 0.6055653399084184, "grad_norm": 0.9181194809408911, "learning_rate": 7.107526810539333e-06, "loss": 0.8682, "step": 4298 }, { "epoch": 0.6057062345896442, "grad_norm": 0.9769411322464636, "learning_rate": 7.10315857122702e-06, "loss": 0.8708, "step": 4299 }, { "epoch": 0.60584712927087, "grad_norm": 1.0082687364816207, "learning_rate": 7.098790935227778e-06, "loss": 0.859, "step": 4300 }, { "epoch": 0.6059880239520958, "grad_norm": 1.0368299647155255, "learning_rate": 7.094423903451246e-06, "loss": 0.8723, "step": 4301 }, { "epoch": 0.6061289186333216, "grad_norm": 1.2557305901444313, "learning_rate": 7.090057476806916e-06, "loss": 0.567, "step": 4302 }, { "epoch": 0.6062698133145473, "grad_norm": 1.2492490704150692, "learning_rate": 7.085691656204176e-06, "loss": 0.5086, "step": 4303 }, { "epoch": 0.6064107079957731, "grad_norm": 1.2035658492279229, "learning_rate": 7.08132644255227e-06, "loss": 0.9132, "step": 4304 }, { "epoch": 0.6065516026769989, "grad_norm": 1.0603868398684693, "learning_rate": 7.076961836760327e-06, "loss": 0.8548, "step": 4305 }, { "epoch": 0.6066924973582247, "grad_norm": 0.8887116925425921, "learning_rate": 7.072597839737341e-06, "loss": 0.7278, "step": 4306 }, { "epoch": 0.6068333920394505, "grad_norm": 1.33714059282334, "learning_rate": 7.0682344523921885e-06, "loss": 0.5039, "step": 4307 }, { "epoch": 0.6069742867206763, "grad_norm": 1.1266329208160382, "learning_rate": 7.063871675633609e-06, "loss": 0.933, "step": 4308 }, { "epoch": 0.6071151814019021, "grad_norm": 1.4220385793905763, "learning_rate": 7.059509510370224e-06, "loss": 0.5625, "step": 4309 }, { "epoch": 0.6072560760831278, "grad_norm": 0.9737032305717571, "learning_rate": 7.055147957510518e-06, "loss": 0.8428, "step": 4310 }, { "epoch": 0.6073969707643536, "grad_norm": 1.3226895852005158, "learning_rate": 7.050787017962863e-06, "loss": 0.5235, "step": 4311 }, { "epoch": 0.6075378654455794, "grad_norm": 1.125449773431988, "learning_rate": 7.046426692635482e-06, "loss": 0.4712, "step": 4312 }, { "epoch": 0.6076787601268052, "grad_norm": 1.0190603536919491, "learning_rate": 7.042066982436493e-06, "loss": 0.9027, "step": 4313 }, { "epoch": 0.607819654808031, "grad_norm": 1.2020678098258715, "learning_rate": 7.037707888273865e-06, "loss": 0.4855, "step": 4314 }, { "epoch": 0.6079605494892568, "grad_norm": 1.2231697812242215, "learning_rate": 7.033349411055454e-06, "loss": 0.4282, "step": 4315 }, { "epoch": 0.6081014441704826, "grad_norm": 0.9079698199213172, "learning_rate": 7.02899155168898e-06, "loss": 0.8719, "step": 4316 }, { "epoch": 0.6082423388517083, "grad_norm": 1.0839309546511562, "learning_rate": 7.0246343110820395e-06, "loss": 0.8955, "step": 4317 }, { "epoch": 0.6083832335329341, "grad_norm": 1.1418419129076947, "learning_rate": 7.02027769014209e-06, "loss": 0.9183, "step": 4318 }, { "epoch": 0.6085241282141599, "grad_norm": 0.9741089499227814, "learning_rate": 7.015921689776473e-06, "loss": 0.8697, "step": 4319 }, { "epoch": 0.6086650228953857, "grad_norm": 0.9613332000771978, "learning_rate": 7.011566310892388e-06, "loss": 0.853, "step": 4320 }, { "epoch": 0.6088059175766115, "grad_norm": 1.2685643471079555, "learning_rate": 7.00721155439692e-06, "loss": 0.4618, "step": 4321 }, { "epoch": 0.6089468122578373, "grad_norm": 1.3260329391773202, "learning_rate": 7.0028574211970105e-06, "loss": 0.5585, "step": 4322 }, { "epoch": 0.609087706939063, "grad_norm": 0.901629054855565, "learning_rate": 6.9985039121994794e-06, "loss": 0.8058, "step": 4323 }, { "epoch": 0.6092286016202888, "grad_norm": 0.9841450982330477, "learning_rate": 6.994151028311009e-06, "loss": 0.8589, "step": 4324 }, { "epoch": 0.6093694963015146, "grad_norm": 1.3028446771391053, "learning_rate": 6.989798770438163e-06, "loss": 0.4135, "step": 4325 }, { "epoch": 0.6095103909827404, "grad_norm": 1.1580244656457523, "learning_rate": 6.9854471394873625e-06, "loss": 0.9012, "step": 4326 }, { "epoch": 0.6096512856639662, "grad_norm": 1.5244211476161393, "learning_rate": 6.981096136364906e-06, "loss": 0.4619, "step": 4327 }, { "epoch": 0.609792180345192, "grad_norm": 1.1399322270270131, "learning_rate": 6.976745761976961e-06, "loss": 0.8323, "step": 4328 }, { "epoch": 0.6099330750264178, "grad_norm": 0.9772279432960336, "learning_rate": 6.972396017229556e-06, "loss": 0.826, "step": 4329 }, { "epoch": 0.6100739697076435, "grad_norm": 0.9222548962865857, "learning_rate": 6.968046903028598e-06, "loss": 0.8714, "step": 4330 }, { "epoch": 0.6102148643888693, "grad_norm": 1.0017727245573287, "learning_rate": 6.963698420279856e-06, "loss": 0.8616, "step": 4331 }, { "epoch": 0.6103557590700951, "grad_norm": 0.959820346582812, "learning_rate": 6.959350569888978e-06, "loss": 0.8719, "step": 4332 }, { "epoch": 0.6104966537513209, "grad_norm": 1.2348748551474908, "learning_rate": 6.95500335276146e-06, "loss": 0.5424, "step": 4333 }, { "epoch": 0.6106375484325467, "grad_norm": 1.3375938255165405, "learning_rate": 6.950656769802692e-06, "loss": 0.4728, "step": 4334 }, { "epoch": 0.6107784431137725, "grad_norm": 1.2832939392514193, "learning_rate": 6.94631082191791e-06, "loss": 0.5538, "step": 4335 }, { "epoch": 0.6109193377949982, "grad_norm": 1.1988533365027885, "learning_rate": 6.94196551001223e-06, "loss": 0.4726, "step": 4336 }, { "epoch": 0.611060232476224, "grad_norm": 1.1317777306889052, "learning_rate": 6.937620834990631e-06, "loss": 0.9006, "step": 4337 }, { "epoch": 0.6112011271574498, "grad_norm": 1.102395661956381, "learning_rate": 6.9332767977579596e-06, "loss": 0.4585, "step": 4338 }, { "epoch": 0.6113420218386756, "grad_norm": 1.2631049994592745, "learning_rate": 6.928933399218931e-06, "loss": 0.5042, "step": 4339 }, { "epoch": 0.6114829165199014, "grad_norm": 0.8887608784002188, "learning_rate": 6.924590640278128e-06, "loss": 0.8328, "step": 4340 }, { "epoch": 0.6116238112011272, "grad_norm": 1.2648255799124786, "learning_rate": 6.920248521839995e-06, "loss": 0.4895, "step": 4341 }, { "epoch": 0.611764705882353, "grad_norm": 1.073692801248335, "learning_rate": 6.915907044808855e-06, "loss": 0.8826, "step": 4342 }, { "epoch": 0.6119056005635787, "grad_norm": 1.3303026919896608, "learning_rate": 6.911566210088878e-06, "loss": 0.6665, "step": 4343 }, { "epoch": 0.6120464952448045, "grad_norm": 1.0496961812626537, "learning_rate": 6.907226018584121e-06, "loss": 0.8981, "step": 4344 }, { "epoch": 0.6121873899260303, "grad_norm": 0.9871404875801972, "learning_rate": 6.902886471198491e-06, "loss": 0.8822, "step": 4345 }, { "epoch": 0.6123282846072561, "grad_norm": 1.118154213178859, "learning_rate": 6.898547568835774e-06, "loss": 0.8799, "step": 4346 }, { "epoch": 0.6124691792884819, "grad_norm": 0.9628167344775108, "learning_rate": 6.894209312399609e-06, "loss": 0.8872, "step": 4347 }, { "epoch": 0.6126100739697077, "grad_norm": 1.005654396543449, "learning_rate": 6.889871702793511e-06, "loss": 0.8617, "step": 4348 }, { "epoch": 0.6127509686509335, "grad_norm": 1.2963892553895735, "learning_rate": 6.885534740920849e-06, "loss": 0.4887, "step": 4349 }, { "epoch": 0.6128918633321592, "grad_norm": 1.0152758576487646, "learning_rate": 6.881198427684877e-06, "loss": 0.8119, "step": 4350 }, { "epoch": 0.613032758013385, "grad_norm": 0.9313793346592272, "learning_rate": 6.8768627639886855e-06, "loss": 0.8623, "step": 4351 }, { "epoch": 0.6131736526946108, "grad_norm": 1.576081288976534, "learning_rate": 6.872527750735256e-06, "loss": 0.6354, "step": 4352 }, { "epoch": 0.6133145473758366, "grad_norm": 1.0124610636853266, "learning_rate": 6.868193388827418e-06, "loss": 0.8792, "step": 4353 }, { "epoch": 0.6134554420570624, "grad_norm": 1.3000950612818725, "learning_rate": 6.863859679167875e-06, "loss": 0.6059, "step": 4354 }, { "epoch": 0.6135963367382882, "grad_norm": 0.9444408633297261, "learning_rate": 6.859526622659187e-06, "loss": 0.8276, "step": 4355 }, { "epoch": 0.6137372314195139, "grad_norm": 0.9936255730384292, "learning_rate": 6.855194220203785e-06, "loss": 0.9244, "step": 4356 }, { "epoch": 0.6138781261007397, "grad_norm": 0.9786348759750436, "learning_rate": 6.8508624727039565e-06, "loss": 0.8919, "step": 4357 }, { "epoch": 0.6140190207819655, "grad_norm": 1.2319327297081493, "learning_rate": 6.8465313810618595e-06, "loss": 0.4201, "step": 4358 }, { "epoch": 0.6141599154631913, "grad_norm": 1.2153725525461267, "learning_rate": 6.842200946179507e-06, "loss": 0.5435, "step": 4359 }, { "epoch": 0.6143008101444171, "grad_norm": 1.463008660404819, "learning_rate": 6.837871168958793e-06, "loss": 0.6656, "step": 4360 }, { "epoch": 0.6144417048256429, "grad_norm": 1.5464382925345668, "learning_rate": 6.833542050301447e-06, "loss": 0.5934, "step": 4361 }, { "epoch": 0.6145825995068687, "grad_norm": 1.2211114333063042, "learning_rate": 6.829213591109087e-06, "loss": 0.4668, "step": 4362 }, { "epoch": 0.6147234941880944, "grad_norm": 0.9219696567265916, "learning_rate": 6.824885792283178e-06, "loss": 0.8044, "step": 4363 }, { "epoch": 0.6148643888693202, "grad_norm": 1.3549323124999606, "learning_rate": 6.8205586547250565e-06, "loss": 0.59, "step": 4364 }, { "epoch": 0.615005283550546, "grad_norm": 1.0183896039398317, "learning_rate": 6.816232179335913e-06, "loss": 0.844, "step": 4365 }, { "epoch": 0.6151461782317718, "grad_norm": 0.9932500741803121, "learning_rate": 6.811906367016809e-06, "loss": 0.9143, "step": 4366 }, { "epoch": 0.6152870729129976, "grad_norm": 0.8597553616413753, "learning_rate": 6.807581218668659e-06, "loss": 0.7814, "step": 4367 }, { "epoch": 0.6154279675942234, "grad_norm": 1.167661898065514, "learning_rate": 6.803256735192248e-06, "loss": 0.5465, "step": 4368 }, { "epoch": 0.615568862275449, "grad_norm": 0.964575993770377, "learning_rate": 6.798932917488215e-06, "loss": 0.8781, "step": 4369 }, { "epoch": 0.6157097569566748, "grad_norm": 0.9307436624104491, "learning_rate": 6.794609766457061e-06, "loss": 0.8418, "step": 4370 }, { "epoch": 0.6158506516379006, "grad_norm": 1.077205341177424, "learning_rate": 6.790287282999157e-06, "loss": 0.881, "step": 4371 }, { "epoch": 0.6159915463191264, "grad_norm": 1.330037934204099, "learning_rate": 6.785965468014721e-06, "loss": 0.5713, "step": 4372 }, { "epoch": 0.6161324410003522, "grad_norm": 0.9141748983543584, "learning_rate": 6.7816443224038484e-06, "loss": 0.8104, "step": 4373 }, { "epoch": 0.616273335681578, "grad_norm": 1.3899957038051174, "learning_rate": 6.77732384706648e-06, "loss": 0.5944, "step": 4374 }, { "epoch": 0.6164142303628038, "grad_norm": 1.2545755289458531, "learning_rate": 6.773004042902427e-06, "loss": 0.4583, "step": 4375 }, { "epoch": 0.6165551250440295, "grad_norm": 0.9598053987418395, "learning_rate": 6.7686849108113515e-06, "loss": 0.8488, "step": 4376 }, { "epoch": 0.6166960197252553, "grad_norm": 1.0386297920475374, "learning_rate": 6.764366451692787e-06, "loss": 0.8714, "step": 4377 }, { "epoch": 0.6168369144064811, "grad_norm": 1.5073817110167096, "learning_rate": 6.760048666446115e-06, "loss": 0.6001, "step": 4378 }, { "epoch": 0.6169778090877069, "grad_norm": 0.9720872891598501, "learning_rate": 6.755731555970589e-06, "loss": 0.8432, "step": 4379 }, { "epoch": 0.6171187037689327, "grad_norm": 1.106441619064616, "learning_rate": 6.751415121165309e-06, "loss": 0.9135, "step": 4380 }, { "epoch": 0.6172595984501585, "grad_norm": 1.3135966435173516, "learning_rate": 6.74709936292925e-06, "loss": 0.5036, "step": 4381 }, { "epoch": 0.6174004931313843, "grad_norm": 1.3634892511655752, "learning_rate": 6.742784282161225e-06, "loss": 0.5565, "step": 4382 }, { "epoch": 0.61754138781261, "grad_norm": 1.3441449987533014, "learning_rate": 6.738469879759928e-06, "loss": 0.5212, "step": 4383 }, { "epoch": 0.6176822824938358, "grad_norm": 1.1157084437268328, "learning_rate": 6.7341561566238944e-06, "loss": 0.8327, "step": 4384 }, { "epoch": 0.6178231771750616, "grad_norm": 1.3371554684467661, "learning_rate": 6.729843113651533e-06, "loss": 0.6153, "step": 4385 }, { "epoch": 0.6179640718562874, "grad_norm": 0.9359920236702438, "learning_rate": 6.725530751741096e-06, "loss": 0.891, "step": 4386 }, { "epoch": 0.6181049665375132, "grad_norm": 0.9037214779983678, "learning_rate": 6.721219071790706e-06, "loss": 0.7955, "step": 4387 }, { "epoch": 0.618245861218739, "grad_norm": 1.0963304894593338, "learning_rate": 6.716908074698334e-06, "loss": 0.8866, "step": 4388 }, { "epoch": 0.6183867558999647, "grad_norm": 0.996528408088618, "learning_rate": 6.712597761361818e-06, "loss": 0.8532, "step": 4389 }, { "epoch": 0.6185276505811905, "grad_norm": 1.0002898203936217, "learning_rate": 6.708288132678844e-06, "loss": 0.8683, "step": 4390 }, { "epoch": 0.6186685452624163, "grad_norm": 0.9765751480382382, "learning_rate": 6.7039791895469685e-06, "loss": 0.8123, "step": 4391 }, { "epoch": 0.6188094399436421, "grad_norm": 1.3305748076836652, "learning_rate": 6.699670932863585e-06, "loss": 0.5814, "step": 4392 }, { "epoch": 0.6189503346248679, "grad_norm": 0.9930482982134797, "learning_rate": 6.695363363525968e-06, "loss": 0.9043, "step": 4393 }, { "epoch": 0.6190912293060937, "grad_norm": 0.9096820463236747, "learning_rate": 6.691056482431231e-06, "loss": 0.7946, "step": 4394 }, { "epoch": 0.6192321239873195, "grad_norm": 1.3013512839427301, "learning_rate": 6.686750290476352e-06, "loss": 0.5271, "step": 4395 }, { "epoch": 0.6193730186685452, "grad_norm": 1.000931292959214, "learning_rate": 6.68244478855816e-06, "loss": 0.8536, "step": 4396 }, { "epoch": 0.619513913349771, "grad_norm": 1.0446239266248027, "learning_rate": 6.678139977573349e-06, "loss": 0.8524, "step": 4397 }, { "epoch": 0.6196548080309968, "grad_norm": 1.0966454255870266, "learning_rate": 6.67383585841846e-06, "loss": 0.8864, "step": 4398 }, { "epoch": 0.6197957027122226, "grad_norm": 1.0261880245181736, "learning_rate": 6.669532431989898e-06, "loss": 0.8639, "step": 4399 }, { "epoch": 0.6199365973934484, "grad_norm": 0.9707059453110513, "learning_rate": 6.6652296991839124e-06, "loss": 0.8546, "step": 4400 }, { "epoch": 0.6200774920746742, "grad_norm": 1.3621862693027584, "learning_rate": 6.660927660896627e-06, "loss": 0.5053, "step": 4401 }, { "epoch": 0.6202183867559, "grad_norm": 0.9661644318497682, "learning_rate": 6.6566263180239955e-06, "loss": 0.8885, "step": 4402 }, { "epoch": 0.6203592814371257, "grad_norm": 1.1487717914251105, "learning_rate": 6.6523256714618535e-06, "loss": 0.8954, "step": 4403 }, { "epoch": 0.6205001761183515, "grad_norm": 1.280545415246968, "learning_rate": 6.648025722105871e-06, "loss": 0.546, "step": 4404 }, { "epoch": 0.6206410707995773, "grad_norm": 1.412525888798955, "learning_rate": 6.643726470851583e-06, "loss": 0.6273, "step": 4405 }, { "epoch": 0.6207819654808031, "grad_norm": 1.0138617117352426, "learning_rate": 6.639427918594374e-06, "loss": 0.9083, "step": 4406 }, { "epoch": 0.6209228601620289, "grad_norm": 1.0400302894421958, "learning_rate": 6.6351300662294915e-06, "loss": 0.8607, "step": 4407 }, { "epoch": 0.6210637548432547, "grad_norm": 1.2497629502329917, "learning_rate": 6.630832914652021e-06, "loss": 0.5287, "step": 4408 }, { "epoch": 0.6212046495244804, "grad_norm": 0.9867210411356339, "learning_rate": 6.626536464756927e-06, "loss": 0.8213, "step": 4409 }, { "epoch": 0.6213455442057062, "grad_norm": 0.9322550312927379, "learning_rate": 6.622240717438998e-06, "loss": 0.8495, "step": 4410 }, { "epoch": 0.621486438886932, "grad_norm": 1.3167724146637592, "learning_rate": 6.6179456735929025e-06, "loss": 0.5743, "step": 4411 }, { "epoch": 0.6216273335681578, "grad_norm": 1.0120400130372758, "learning_rate": 6.613651334113149e-06, "loss": 0.8768, "step": 4412 }, { "epoch": 0.6217682282493836, "grad_norm": 0.9400124154363689, "learning_rate": 6.609357699894095e-06, "loss": 0.8533, "step": 4413 }, { "epoch": 0.6219091229306094, "grad_norm": 1.2771098194913781, "learning_rate": 6.605064771829965e-06, "loss": 0.5393, "step": 4414 }, { "epoch": 0.6220500176118352, "grad_norm": 0.9412583808430911, "learning_rate": 6.600772550814825e-06, "loss": 0.8877, "step": 4415 }, { "epoch": 0.6221909122930609, "grad_norm": 1.0700253194848584, "learning_rate": 6.596481037742602e-06, "loss": 0.822, "step": 4416 }, { "epoch": 0.6223318069742867, "grad_norm": 1.0435599183492954, "learning_rate": 6.592190233507068e-06, "loss": 0.8158, "step": 4417 }, { "epoch": 0.6224727016555125, "grad_norm": 1.0219111776510459, "learning_rate": 6.587900139001852e-06, "loss": 0.8548, "step": 4418 }, { "epoch": 0.6226135963367383, "grad_norm": 1.0342456698988411, "learning_rate": 6.5836107551204355e-06, "loss": 0.879, "step": 4419 }, { "epoch": 0.6227544910179641, "grad_norm": 0.9642720107052726, "learning_rate": 6.57932208275615e-06, "loss": 0.8955, "step": 4420 }, { "epoch": 0.6228953856991899, "grad_norm": 0.9846943127336415, "learning_rate": 6.575034122802174e-06, "loss": 0.8137, "step": 4421 }, { "epoch": 0.6230362803804156, "grad_norm": 1.226408460645073, "learning_rate": 6.570746876151554e-06, "loss": 0.4785, "step": 4422 }, { "epoch": 0.6231771750616414, "grad_norm": 0.9771409758997887, "learning_rate": 6.566460343697165e-06, "loss": 0.8571, "step": 4423 }, { "epoch": 0.6233180697428672, "grad_norm": 1.3138256498096774, "learning_rate": 6.562174526331755e-06, "loss": 0.5147, "step": 4424 }, { "epoch": 0.623458964424093, "grad_norm": 1.2534664772626467, "learning_rate": 6.557889424947908e-06, "loss": 0.6329, "step": 4425 }, { "epoch": 0.6235998591053188, "grad_norm": 1.352079857109694, "learning_rate": 6.553605040438066e-06, "loss": 0.53, "step": 4426 }, { "epoch": 0.6237407537865446, "grad_norm": 0.9683331639467789, "learning_rate": 6.5493213736945175e-06, "loss": 0.8651, "step": 4427 }, { "epoch": 0.6238816484677704, "grad_norm": 0.955700131144161, "learning_rate": 6.545038425609409e-06, "loss": 0.828, "step": 4428 }, { "epoch": 0.6240225431489961, "grad_norm": 1.0165391721310506, "learning_rate": 6.540756197074726e-06, "loss": 0.8466, "step": 4429 }, { "epoch": 0.6241634378302219, "grad_norm": 1.4005135309295214, "learning_rate": 6.536474688982315e-06, "loss": 0.5124, "step": 4430 }, { "epoch": 0.6243043325114477, "grad_norm": 0.913912093078157, "learning_rate": 6.532193902223862e-06, "loss": 0.8314, "step": 4431 }, { "epoch": 0.6244452271926735, "grad_norm": 0.9686361352601899, "learning_rate": 6.527913837690918e-06, "loss": 0.8536, "step": 4432 }, { "epoch": 0.6245861218738993, "grad_norm": 1.008113034427106, "learning_rate": 6.523634496274867e-06, "loss": 0.8327, "step": 4433 }, { "epoch": 0.6247270165551251, "grad_norm": 0.986109222316633, "learning_rate": 6.519355878866952e-06, "loss": 0.8778, "step": 4434 }, { "epoch": 0.6248679112363509, "grad_norm": 0.9731331025893056, "learning_rate": 6.51507798635826e-06, "loss": 0.8571, "step": 4435 }, { "epoch": 0.6250088059175766, "grad_norm": 1.216048858480525, "learning_rate": 6.510800819639735e-06, "loss": 0.5313, "step": 4436 }, { "epoch": 0.6251497005988024, "grad_norm": 1.1089350594591652, "learning_rate": 6.506524379602161e-06, "loss": 0.8821, "step": 4437 }, { "epoch": 0.6252905952800282, "grad_norm": 1.0260752119788277, "learning_rate": 6.502248667136175e-06, "loss": 0.9242, "step": 4438 }, { "epoch": 0.625431489961254, "grad_norm": 0.9658205236406765, "learning_rate": 6.497973683132259e-06, "loss": 0.8579, "step": 4439 }, { "epoch": 0.6255723846424798, "grad_norm": 0.944754568029533, "learning_rate": 6.493699428480755e-06, "loss": 0.8191, "step": 4440 }, { "epoch": 0.6257132793237056, "grad_norm": 1.0393600059762897, "learning_rate": 6.489425904071831e-06, "loss": 0.8346, "step": 4441 }, { "epoch": 0.6258541740049313, "grad_norm": 1.0690269398759256, "learning_rate": 6.485153110795528e-06, "loss": 0.8221, "step": 4442 }, { "epoch": 0.6259950686861571, "grad_norm": 1.0354129914421337, "learning_rate": 6.480881049541716e-06, "loss": 0.8328, "step": 4443 }, { "epoch": 0.6261359633673829, "grad_norm": 0.9854043505960812, "learning_rate": 6.476609721200124e-06, "loss": 0.8507, "step": 4444 }, { "epoch": 0.6262768580486087, "grad_norm": 1.426752695735619, "learning_rate": 6.472339126660319e-06, "loss": 0.557, "step": 4445 }, { "epoch": 0.6264177527298345, "grad_norm": 0.8807853364445066, "learning_rate": 6.468069266811723e-06, "loss": 0.7883, "step": 4446 }, { "epoch": 0.6265586474110603, "grad_norm": 1.0282676866244398, "learning_rate": 6.463800142543599e-06, "loss": 0.8909, "step": 4447 }, { "epoch": 0.6266995420922861, "grad_norm": 1.3413313940670528, "learning_rate": 6.459531754745065e-06, "loss": 0.4927, "step": 4448 }, { "epoch": 0.6268404367735118, "grad_norm": 1.0513271426676873, "learning_rate": 6.4552641043050725e-06, "loss": 0.8846, "step": 4449 }, { "epoch": 0.6269813314547376, "grad_norm": 1.3790623351166702, "learning_rate": 6.450997192112437e-06, "loss": 0.6271, "step": 4450 }, { "epoch": 0.6271222261359634, "grad_norm": 1.340084506026714, "learning_rate": 6.446731019055799e-06, "loss": 0.5489, "step": 4451 }, { "epoch": 0.6272631208171892, "grad_norm": 1.0389206424990147, "learning_rate": 6.442465586023667e-06, "loss": 0.8625, "step": 4452 }, { "epoch": 0.627404015498415, "grad_norm": 1.0660730538450802, "learning_rate": 6.438200893904382e-06, "loss": 0.8466, "step": 4453 }, { "epoch": 0.6275449101796408, "grad_norm": 1.0454721251531574, "learning_rate": 6.4339369435861275e-06, "loss": 0.8846, "step": 4454 }, { "epoch": 0.6276858048608664, "grad_norm": 1.0604935959526791, "learning_rate": 6.429673735956947e-06, "loss": 0.8411, "step": 4455 }, { "epoch": 0.6278266995420922, "grad_norm": 1.2801829936166298, "learning_rate": 6.425411271904715e-06, "loss": 0.5084, "step": 4456 }, { "epoch": 0.627967594223318, "grad_norm": 1.252388832380457, "learning_rate": 6.421149552317163e-06, "loss": 0.5628, "step": 4457 }, { "epoch": 0.6281084889045438, "grad_norm": 1.1803831457309508, "learning_rate": 6.416888578081856e-06, "loss": 0.5004, "step": 4458 }, { "epoch": 0.6282493835857696, "grad_norm": 0.9897942048725252, "learning_rate": 6.412628350086212e-06, "loss": 0.8365, "step": 4459 }, { "epoch": 0.6283902782669955, "grad_norm": 1.1220352197003756, "learning_rate": 6.4083688692174875e-06, "loss": 0.8578, "step": 4460 }, { "epoch": 0.6285311729482213, "grad_norm": 1.2922790127133166, "learning_rate": 6.404110136362793e-06, "loss": 0.5576, "step": 4461 }, { "epoch": 0.6286720676294469, "grad_norm": 1.5336134103547814, "learning_rate": 6.39985215240907e-06, "loss": 0.5907, "step": 4462 }, { "epoch": 0.6288129623106727, "grad_norm": 1.021163574842507, "learning_rate": 6.395594918243118e-06, "loss": 0.8778, "step": 4463 }, { "epoch": 0.6289538569918985, "grad_norm": 0.9453045063488386, "learning_rate": 6.3913384347515685e-06, "loss": 0.8328, "step": 4464 }, { "epoch": 0.6290947516731243, "grad_norm": 0.9560496918371482, "learning_rate": 6.387082702820905e-06, "loss": 0.9031, "step": 4465 }, { "epoch": 0.6292356463543501, "grad_norm": 0.9898430408215653, "learning_rate": 6.382827723337446e-06, "loss": 0.846, "step": 4466 }, { "epoch": 0.6293765410355759, "grad_norm": 0.9560146373419819, "learning_rate": 6.378573497187365e-06, "loss": 0.8384, "step": 4467 }, { "epoch": 0.6295174357168017, "grad_norm": 0.9692663719816899, "learning_rate": 6.3743200252566665e-06, "loss": 0.8702, "step": 4468 }, { "epoch": 0.6296583303980274, "grad_norm": 1.343893790956638, "learning_rate": 6.370067308431205e-06, "loss": 0.5588, "step": 4469 }, { "epoch": 0.6297992250792532, "grad_norm": 0.9170960203783861, "learning_rate": 6.365815347596674e-06, "loss": 0.8488, "step": 4470 }, { "epoch": 0.629940119760479, "grad_norm": 1.0641013118379683, "learning_rate": 6.361564143638619e-06, "loss": 0.896, "step": 4471 }, { "epoch": 0.6300810144417048, "grad_norm": 1.102788431082465, "learning_rate": 6.357313697442411e-06, "loss": 0.8479, "step": 4472 }, { "epoch": 0.6302219091229306, "grad_norm": 1.2962241335825386, "learning_rate": 6.35306400989328e-06, "loss": 0.4714, "step": 4473 }, { "epoch": 0.6303628038041564, "grad_norm": 0.8391611551984806, "learning_rate": 6.348815081876285e-06, "loss": 0.8198, "step": 4474 }, { "epoch": 0.6305036984853821, "grad_norm": 0.9529306181471662, "learning_rate": 6.344566914276339e-06, "loss": 0.8082, "step": 4475 }, { "epoch": 0.6306445931666079, "grad_norm": 1.2225767751625078, "learning_rate": 6.340319507978183e-06, "loss": 0.5797, "step": 4476 }, { "epoch": 0.6307854878478337, "grad_norm": 1.0619343189207977, "learning_rate": 6.336072863866414e-06, "loss": 0.9257, "step": 4477 }, { "epoch": 0.6309263825290595, "grad_norm": 0.9738130058886251, "learning_rate": 6.3318269828254556e-06, "loss": 0.8457, "step": 4478 }, { "epoch": 0.6310672772102853, "grad_norm": 1.3415021597896837, "learning_rate": 6.327581865739585e-06, "loss": 0.5654, "step": 4479 }, { "epoch": 0.6312081718915111, "grad_norm": 1.0932566769661392, "learning_rate": 6.323337513492909e-06, "loss": 0.8789, "step": 4480 }, { "epoch": 0.6313490665727369, "grad_norm": 0.9795817128386831, "learning_rate": 6.3190939269693915e-06, "loss": 0.8538, "step": 4481 }, { "epoch": 0.6314899612539626, "grad_norm": 0.9528596780467696, "learning_rate": 6.314851107052815e-06, "loss": 0.8523, "step": 4482 }, { "epoch": 0.6316308559351884, "grad_norm": 1.0699762941031554, "learning_rate": 6.3106090546268215e-06, "loss": 0.8755, "step": 4483 }, { "epoch": 0.6317717506164142, "grad_norm": 0.9537426616543366, "learning_rate": 6.306367770574883e-06, "loss": 0.8709, "step": 4484 }, { "epoch": 0.63191264529764, "grad_norm": 1.2247765573375502, "learning_rate": 6.302127255780315e-06, "loss": 0.6243, "step": 4485 }, { "epoch": 0.6320535399788658, "grad_norm": 1.0170978245544942, "learning_rate": 6.2978875111262685e-06, "loss": 0.8957, "step": 4486 }, { "epoch": 0.6321944346600916, "grad_norm": 0.903194502460463, "learning_rate": 6.293648537495743e-06, "loss": 0.8663, "step": 4487 }, { "epoch": 0.6323353293413174, "grad_norm": 0.9822378490315473, "learning_rate": 6.289410335771567e-06, "loss": 0.8424, "step": 4488 }, { "epoch": 0.6324762240225431, "grad_norm": 0.9521086106464811, "learning_rate": 6.285172906836416e-06, "loss": 0.8657, "step": 4489 }, { "epoch": 0.6326171187037689, "grad_norm": 1.0923304913895346, "learning_rate": 6.280936251572798e-06, "loss": 0.4838, "step": 4490 }, { "epoch": 0.6327580133849947, "grad_norm": 0.9802887925615458, "learning_rate": 6.276700370863068e-06, "loss": 0.8817, "step": 4491 }, { "epoch": 0.6328989080662205, "grad_norm": 1.186303218357252, "learning_rate": 6.272465265589412e-06, "loss": 0.4691, "step": 4492 }, { "epoch": 0.6330398027474463, "grad_norm": 1.0636864785565758, "learning_rate": 6.268230936633861e-06, "loss": 0.8418, "step": 4493 }, { "epoch": 0.6331806974286721, "grad_norm": 1.0472035549922682, "learning_rate": 6.263997384878278e-06, "loss": 0.8184, "step": 4494 }, { "epoch": 0.6333215921098978, "grad_norm": 1.1485463969579832, "learning_rate": 6.259764611204369e-06, "loss": 0.8377, "step": 4495 }, { "epoch": 0.6334624867911236, "grad_norm": 0.9831554519335999, "learning_rate": 6.255532616493676e-06, "loss": 0.9016, "step": 4496 }, { "epoch": 0.6336033814723494, "grad_norm": 0.9084283680950415, "learning_rate": 6.251301401627575e-06, "loss": 0.8859, "step": 4497 }, { "epoch": 0.6337442761535752, "grad_norm": 1.0509358098755812, "learning_rate": 6.24707096748729e-06, "loss": 0.4643, "step": 4498 }, { "epoch": 0.633885170834801, "grad_norm": 1.1689793268018942, "learning_rate": 6.2428413149538695e-06, "loss": 0.4573, "step": 4499 }, { "epoch": 0.6340260655160268, "grad_norm": 0.9973899036842058, "learning_rate": 6.23861244490821e-06, "loss": 0.8484, "step": 4500 }, { "epoch": 0.6341669601972526, "grad_norm": 1.2865222331804997, "learning_rate": 6.234384358231036e-06, "loss": 0.5348, "step": 4501 }, { "epoch": 0.6343078548784783, "grad_norm": 0.8948432750235861, "learning_rate": 6.230157055802923e-06, "loss": 0.8418, "step": 4502 }, { "epoch": 0.6344487495597041, "grad_norm": 1.1881327064709821, "learning_rate": 6.22593053850426e-06, "loss": 0.4834, "step": 4503 }, { "epoch": 0.6345896442409299, "grad_norm": 1.1066297239702612, "learning_rate": 6.221704807215298e-06, "loss": 0.4815, "step": 4504 }, { "epoch": 0.6347305389221557, "grad_norm": 0.936777781427783, "learning_rate": 6.217479862816105e-06, "loss": 0.8082, "step": 4505 }, { "epoch": 0.6348714336033815, "grad_norm": 0.9729345197336836, "learning_rate": 6.213255706186598e-06, "loss": 0.9019, "step": 4506 }, { "epoch": 0.6350123282846073, "grad_norm": 1.247163537725529, "learning_rate": 6.20903233820652e-06, "loss": 0.5723, "step": 4507 }, { "epoch": 0.635153222965833, "grad_norm": 1.1989791390977644, "learning_rate": 6.204809759755458e-06, "loss": 0.4728, "step": 4508 }, { "epoch": 0.6352941176470588, "grad_norm": 1.1702704499765737, "learning_rate": 6.200587971712827e-06, "loss": 0.8683, "step": 4509 }, { "epoch": 0.6354350123282846, "grad_norm": 1.4233360178758812, "learning_rate": 6.196366974957884e-06, "loss": 0.5183, "step": 4510 }, { "epoch": 0.6355759070095104, "grad_norm": 1.2427384789034024, "learning_rate": 6.192146770369713e-06, "loss": 0.4703, "step": 4511 }, { "epoch": 0.6357168016907362, "grad_norm": 0.99211829784823, "learning_rate": 6.1879273588272505e-06, "loss": 0.834, "step": 4512 }, { "epoch": 0.635857696371962, "grad_norm": 0.9421607789060865, "learning_rate": 6.1837087412092426e-06, "loss": 0.8314, "step": 4513 }, { "epoch": 0.6359985910531878, "grad_norm": 0.9937984421796411, "learning_rate": 6.179490918394291e-06, "loss": 0.8477, "step": 4514 }, { "epoch": 0.6361394857344135, "grad_norm": 0.8990787456829848, "learning_rate": 6.175273891260819e-06, "loss": 0.8267, "step": 4515 }, { "epoch": 0.6362803804156393, "grad_norm": 1.1549358458661636, "learning_rate": 6.171057660687094e-06, "loss": 0.6763, "step": 4516 }, { "epoch": 0.6364212750968651, "grad_norm": 1.0014518806436827, "learning_rate": 6.16684222755121e-06, "loss": 0.8842, "step": 4517 }, { "epoch": 0.6365621697780909, "grad_norm": 1.011496073925008, "learning_rate": 6.162627592731099e-06, "loss": 0.9061, "step": 4518 }, { "epoch": 0.6367030644593167, "grad_norm": 0.9944686258337391, "learning_rate": 6.158413757104521e-06, "loss": 0.8427, "step": 4519 }, { "epoch": 0.6368439591405425, "grad_norm": 1.098129653412916, "learning_rate": 6.154200721549084e-06, "loss": 0.8071, "step": 4520 }, { "epoch": 0.6369848538217683, "grad_norm": 1.0627038615916429, "learning_rate": 6.149988486942207e-06, "loss": 0.8177, "step": 4521 }, { "epoch": 0.637125748502994, "grad_norm": 1.068710480575502, "learning_rate": 6.145777054161164e-06, "loss": 0.8817, "step": 4522 }, { "epoch": 0.6372666431842198, "grad_norm": 1.1180338509335144, "learning_rate": 6.141566424083047e-06, "loss": 0.8505, "step": 4523 }, { "epoch": 0.6374075378654456, "grad_norm": 1.3404690415443854, "learning_rate": 6.1373565975847914e-06, "loss": 0.5544, "step": 4524 }, { "epoch": 0.6375484325466714, "grad_norm": 1.1312625950570276, "learning_rate": 6.133147575543155e-06, "loss": 0.8724, "step": 4525 }, { "epoch": 0.6376893272278972, "grad_norm": 1.12335302889892, "learning_rate": 6.1289393588347354e-06, "loss": 0.8147, "step": 4526 }, { "epoch": 0.637830221909123, "grad_norm": 1.33705562658983, "learning_rate": 6.12473194833596e-06, "loss": 0.535, "step": 4527 }, { "epoch": 0.6379711165903487, "grad_norm": 0.9262373427587742, "learning_rate": 6.120525344923092e-06, "loss": 0.8416, "step": 4528 }, { "epoch": 0.6381120112715745, "grad_norm": 0.8453924976139157, "learning_rate": 6.116319549472214e-06, "loss": 0.8224, "step": 4529 }, { "epoch": 0.6382529059528003, "grad_norm": 1.0855260453131104, "learning_rate": 6.112114562859264e-06, "loss": 0.8572, "step": 4530 }, { "epoch": 0.6383938006340261, "grad_norm": 1.2351603023280415, "learning_rate": 6.107910385959981e-06, "loss": 0.5303, "step": 4531 }, { "epoch": 0.6385346953152519, "grad_norm": 1.2363057159385464, "learning_rate": 6.103707019649963e-06, "loss": 0.487, "step": 4532 }, { "epoch": 0.6386755899964777, "grad_norm": 1.0065312588249795, "learning_rate": 6.099504464804621e-06, "loss": 0.8572, "step": 4533 }, { "epoch": 0.6388164846777035, "grad_norm": 1.0680722322028104, "learning_rate": 6.095302722299209e-06, "loss": 0.8476, "step": 4534 }, { "epoch": 0.6389573793589292, "grad_norm": 1.0169750334117367, "learning_rate": 6.0911017930088016e-06, "loss": 0.7683, "step": 4535 }, { "epoch": 0.639098274040155, "grad_norm": 1.1961663094716095, "learning_rate": 6.086901677808313e-06, "loss": 0.5142, "step": 4536 }, { "epoch": 0.6392391687213808, "grad_norm": 1.0404694217742425, "learning_rate": 6.082702377572481e-06, "loss": 0.8622, "step": 4537 }, { "epoch": 0.6393800634026066, "grad_norm": 1.111381833496291, "learning_rate": 6.078503893175874e-06, "loss": 0.8326, "step": 4538 }, { "epoch": 0.6395209580838324, "grad_norm": 0.9373552331088572, "learning_rate": 6.0743062254928966e-06, "loss": 0.8682, "step": 4539 }, { "epoch": 0.6396618527650582, "grad_norm": 1.2524568172656896, "learning_rate": 6.070109375397778e-06, "loss": 0.4799, "step": 4540 }, { "epoch": 0.6398027474462838, "grad_norm": 1.0669098178341077, "learning_rate": 6.0659133437645815e-06, "loss": 0.8685, "step": 4541 }, { "epoch": 0.6399436421275096, "grad_norm": 1.0516398778701743, "learning_rate": 6.061718131467189e-06, "loss": 0.8598, "step": 4542 }, { "epoch": 0.6400845368087354, "grad_norm": 1.120817422825652, "learning_rate": 6.057523739379332e-06, "loss": 0.9434, "step": 4543 }, { "epoch": 0.6402254314899612, "grad_norm": 0.9935915250101504, "learning_rate": 6.053330168374548e-06, "loss": 0.8581, "step": 4544 }, { "epoch": 0.640366326171187, "grad_norm": 1.3723178638829807, "learning_rate": 6.0491374193262234e-06, "loss": 0.5079, "step": 4545 }, { "epoch": 0.6405072208524129, "grad_norm": 1.1710943658348725, "learning_rate": 6.044945493107557e-06, "loss": 0.451, "step": 4546 }, { "epoch": 0.6406481155336387, "grad_norm": 0.983008332891314, "learning_rate": 6.040754390591589e-06, "loss": 0.8747, "step": 4547 }, { "epoch": 0.6407890102148643, "grad_norm": 1.186918861306155, "learning_rate": 6.03656411265118e-06, "loss": 0.4891, "step": 4548 }, { "epoch": 0.6409299048960901, "grad_norm": 1.1161167927239701, "learning_rate": 6.032374660159023e-06, "loss": 0.8732, "step": 4549 }, { "epoch": 0.6410707995773159, "grad_norm": 1.0225456560412538, "learning_rate": 6.028186033987633e-06, "loss": 0.8577, "step": 4550 }, { "epoch": 0.6412116942585417, "grad_norm": 1.1214812750757395, "learning_rate": 6.02399823500937e-06, "loss": 0.4723, "step": 4551 }, { "epoch": 0.6413525889397675, "grad_norm": 1.372154916800752, "learning_rate": 6.019811264096395e-06, "loss": 0.4135, "step": 4552 }, { "epoch": 0.6414934836209933, "grad_norm": 0.9227628908496038, "learning_rate": 6.015625122120721e-06, "loss": 0.8317, "step": 4553 }, { "epoch": 0.6416343783022191, "grad_norm": 1.07631191481027, "learning_rate": 6.011439809954171e-06, "loss": 0.8522, "step": 4554 }, { "epoch": 0.6417752729834448, "grad_norm": 1.0485994552313538, "learning_rate": 6.007255328468411e-06, "loss": 0.851, "step": 4555 }, { "epoch": 0.6419161676646706, "grad_norm": 1.2176983173125981, "learning_rate": 6.0030716785349175e-06, "loss": 0.53, "step": 4556 }, { "epoch": 0.6420570623458964, "grad_norm": 0.9824194093804157, "learning_rate": 5.998888861025007e-06, "loss": 0.8546, "step": 4557 }, { "epoch": 0.6421979570271222, "grad_norm": 0.94078182880792, "learning_rate": 5.994706876809815e-06, "loss": 0.8203, "step": 4558 }, { "epoch": 0.642338851708348, "grad_norm": 1.003352222125151, "learning_rate": 5.990525726760307e-06, "loss": 0.8728, "step": 4559 }, { "epoch": 0.6424797463895738, "grad_norm": 1.2185373827075756, "learning_rate": 5.98634541174727e-06, "loss": 0.4495, "step": 4560 }, { "epoch": 0.6426206410707995, "grad_norm": 1.5855886798907908, "learning_rate": 5.982165932641328e-06, "loss": 0.5865, "step": 4561 }, { "epoch": 0.6427615357520253, "grad_norm": 1.3177021765289458, "learning_rate": 5.977987290312915e-06, "loss": 0.4794, "step": 4562 }, { "epoch": 0.6429024304332511, "grad_norm": 0.9597007719043431, "learning_rate": 5.973809485632307e-06, "loss": 0.8298, "step": 4563 }, { "epoch": 0.6430433251144769, "grad_norm": 1.007678475958205, "learning_rate": 5.969632519469594e-06, "loss": 0.8651, "step": 4564 }, { "epoch": 0.6431842197957027, "grad_norm": 0.9715179260086152, "learning_rate": 5.9654563926946976e-06, "loss": 0.8434, "step": 4565 }, { "epoch": 0.6433251144769285, "grad_norm": 1.1323715415537772, "learning_rate": 5.961281106177359e-06, "loss": 0.4716, "step": 4566 }, { "epoch": 0.6434660091581543, "grad_norm": 1.060301211008607, "learning_rate": 5.957106660787151e-06, "loss": 0.7855, "step": 4567 }, { "epoch": 0.64360690383938, "grad_norm": 1.0064308678969105, "learning_rate": 5.952933057393464e-06, "loss": 0.8857, "step": 4568 }, { "epoch": 0.6437477985206058, "grad_norm": 1.0132564917480862, "learning_rate": 5.948760296865523e-06, "loss": 0.9087, "step": 4569 }, { "epoch": 0.6438886932018316, "grad_norm": 0.8446372037705562, "learning_rate": 5.9445883800723624e-06, "loss": 0.7963, "step": 4570 }, { "epoch": 0.6440295878830574, "grad_norm": 1.2717669197098738, "learning_rate": 5.940417307882862e-06, "loss": 0.4769, "step": 4571 }, { "epoch": 0.6441704825642832, "grad_norm": 1.0628148879160895, "learning_rate": 5.9362470811657025e-06, "loss": 0.86, "step": 4572 }, { "epoch": 0.644311377245509, "grad_norm": 0.9950187601392015, "learning_rate": 5.932077700789406e-06, "loss": 0.8079, "step": 4573 }, { "epoch": 0.6444522719267348, "grad_norm": 1.2752044140278413, "learning_rate": 5.927909167622308e-06, "loss": 0.496, "step": 4574 }, { "epoch": 0.6445931666079605, "grad_norm": 0.9439223691841885, "learning_rate": 5.923741482532577e-06, "loss": 0.8605, "step": 4575 }, { "epoch": 0.6447340612891863, "grad_norm": 0.9603530513015934, "learning_rate": 5.919574646388193e-06, "loss": 0.7715, "step": 4576 }, { "epoch": 0.6448749559704121, "grad_norm": 1.266242474635645, "learning_rate": 5.9154086600569695e-06, "loss": 0.4621, "step": 4577 }, { "epoch": 0.6450158506516379, "grad_norm": 1.3158863501638265, "learning_rate": 5.911243524406536e-06, "loss": 0.4746, "step": 4578 }, { "epoch": 0.6451567453328637, "grad_norm": 1.0154740615047027, "learning_rate": 5.907079240304354e-06, "loss": 0.8112, "step": 4579 }, { "epoch": 0.6452976400140895, "grad_norm": 1.2437956299466268, "learning_rate": 5.9029158086176975e-06, "loss": 0.4871, "step": 4580 }, { "epoch": 0.6454385346953152, "grad_norm": 1.2308874963418464, "learning_rate": 5.898753230213662e-06, "loss": 0.4643, "step": 4581 }, { "epoch": 0.645579429376541, "grad_norm": 1.342640513900738, "learning_rate": 5.894591505959182e-06, "loss": 0.5896, "step": 4582 }, { "epoch": 0.6457203240577668, "grad_norm": 1.331000374292781, "learning_rate": 5.8904306367209895e-06, "loss": 0.5786, "step": 4583 }, { "epoch": 0.6458612187389926, "grad_norm": 1.2349222980374048, "learning_rate": 5.886270623365662e-06, "loss": 0.4959, "step": 4584 }, { "epoch": 0.6460021134202184, "grad_norm": 1.0287448142080269, "learning_rate": 5.882111466759582e-06, "loss": 0.8402, "step": 4585 }, { "epoch": 0.6461430081014442, "grad_norm": 1.2956639180903051, "learning_rate": 5.8779531677689614e-06, "loss": 0.4555, "step": 4586 }, { "epoch": 0.64628390278267, "grad_norm": 1.036175983259875, "learning_rate": 5.873795727259832e-06, "loss": 0.8868, "step": 4587 }, { "epoch": 0.6464247974638957, "grad_norm": 0.8850642257199093, "learning_rate": 5.869639146098049e-06, "loss": 0.8469, "step": 4588 }, { "epoch": 0.6465656921451215, "grad_norm": 1.2232546637616715, "learning_rate": 5.8654834251492806e-06, "loss": 0.5126, "step": 4589 }, { "epoch": 0.6467065868263473, "grad_norm": 1.284262571707939, "learning_rate": 5.861328565279027e-06, "loss": 0.563, "step": 4590 }, { "epoch": 0.6468474815075731, "grad_norm": 1.0573888141243417, "learning_rate": 5.857174567352597e-06, "loss": 0.8353, "step": 4591 }, { "epoch": 0.6469883761887989, "grad_norm": 1.0057128328138567, "learning_rate": 5.8530214322351376e-06, "loss": 0.8087, "step": 4592 }, { "epoch": 0.6471292708700247, "grad_norm": 0.9882719920048846, "learning_rate": 5.84886916079159e-06, "loss": 0.8808, "step": 4593 }, { "epoch": 0.6472701655512504, "grad_norm": 1.0932586603339227, "learning_rate": 5.844717753886744e-06, "loss": 0.9002, "step": 4594 }, { "epoch": 0.6474110602324762, "grad_norm": 1.0294715207775185, "learning_rate": 5.840567212385186e-06, "loss": 0.9391, "step": 4595 }, { "epoch": 0.647551954913702, "grad_norm": 0.9533377766101501, "learning_rate": 5.836417537151342e-06, "loss": 0.8786, "step": 4596 }, { "epoch": 0.6476928495949278, "grad_norm": 1.1999262279254275, "learning_rate": 5.832268729049435e-06, "loss": 0.8351, "step": 4597 }, { "epoch": 0.6478337442761536, "grad_norm": 1.0511900527044833, "learning_rate": 5.82812078894353e-06, "loss": 0.8817, "step": 4598 }, { "epoch": 0.6479746389573794, "grad_norm": 1.0046879926186056, "learning_rate": 5.823973717697494e-06, "loss": 0.8711, "step": 4599 }, { "epoch": 0.6481155336386052, "grad_norm": 1.1188524992166742, "learning_rate": 5.8198275161750275e-06, "loss": 0.8964, "step": 4600 }, { "epoch": 0.6482564283198309, "grad_norm": 1.3139684496869397, "learning_rate": 5.815682185239639e-06, "loss": 0.5618, "step": 4601 }, { "epoch": 0.6483973230010567, "grad_norm": 1.1816885890545679, "learning_rate": 5.811537725754661e-06, "loss": 0.4476, "step": 4602 }, { "epoch": 0.6485382176822825, "grad_norm": 1.0777574836561117, "learning_rate": 5.807394138583236e-06, "loss": 0.855, "step": 4603 }, { "epoch": 0.6486791123635083, "grad_norm": 1.0699052035637366, "learning_rate": 5.8032514245883406e-06, "loss": 0.9041, "step": 4604 }, { "epoch": 0.6488200070447341, "grad_norm": 0.9835556404370269, "learning_rate": 5.7991095846327515e-06, "loss": 0.8979, "step": 4605 }, { "epoch": 0.6489609017259599, "grad_norm": 1.2060561853553853, "learning_rate": 5.794968619579087e-06, "loss": 0.4828, "step": 4606 }, { "epoch": 0.6491017964071857, "grad_norm": 1.108790394502211, "learning_rate": 5.790828530289752e-06, "loss": 0.808, "step": 4607 }, { "epoch": 0.6492426910884114, "grad_norm": 0.9940964492214819, "learning_rate": 5.786689317626994e-06, "loss": 0.8175, "step": 4608 }, { "epoch": 0.6493835857696372, "grad_norm": 0.974308680407494, "learning_rate": 5.782550982452866e-06, "loss": 0.8214, "step": 4609 }, { "epoch": 0.649524480450863, "grad_norm": 1.1094075781601664, "learning_rate": 5.778413525629248e-06, "loss": 0.8596, "step": 4610 }, { "epoch": 0.6496653751320888, "grad_norm": 1.1406096269819568, "learning_rate": 5.774276948017827e-06, "loss": 0.5086, "step": 4611 }, { "epoch": 0.6498062698133146, "grad_norm": 1.056911121657256, "learning_rate": 5.770141250480111e-06, "loss": 0.4213, "step": 4612 }, { "epoch": 0.6499471644945404, "grad_norm": 1.3764058027916424, "learning_rate": 5.7660064338774206e-06, "loss": 0.4912, "step": 4613 }, { "epoch": 0.6500880591757661, "grad_norm": 0.9997054118313923, "learning_rate": 5.761872499070903e-06, "loss": 0.8458, "step": 4614 }, { "epoch": 0.6502289538569919, "grad_norm": 0.993388622548744, "learning_rate": 5.75773944692151e-06, "loss": 0.835, "step": 4615 }, { "epoch": 0.6503698485382177, "grad_norm": 1.0024729419377536, "learning_rate": 5.753607278290021e-06, "loss": 0.8392, "step": 4616 }, { "epoch": 0.6505107432194435, "grad_norm": 1.10972716393551, "learning_rate": 5.7494759940370235e-06, "loss": 0.8995, "step": 4617 }, { "epoch": 0.6506516379006693, "grad_norm": 0.9971801263712878, "learning_rate": 5.745345595022922e-06, "loss": 0.8711, "step": 4618 }, { "epoch": 0.6507925325818951, "grad_norm": 1.109851470845484, "learning_rate": 5.741216082107935e-06, "loss": 0.906, "step": 4619 }, { "epoch": 0.6509334272631209, "grad_norm": 1.2426662564620756, "learning_rate": 5.737087456152102e-06, "loss": 0.464, "step": 4620 }, { "epoch": 0.6510743219443466, "grad_norm": 1.0021221972195165, "learning_rate": 5.732959718015276e-06, "loss": 0.7885, "step": 4621 }, { "epoch": 0.6512152166255724, "grad_norm": 0.9941502847928511, "learning_rate": 5.728832868557122e-06, "loss": 0.8397, "step": 4622 }, { "epoch": 0.6513561113067982, "grad_norm": 0.9942581010790575, "learning_rate": 5.72470690863712e-06, "loss": 0.8884, "step": 4623 }, { "epoch": 0.651497005988024, "grad_norm": 1.045300090329274, "learning_rate": 5.720581839114565e-06, "loss": 0.8985, "step": 4624 }, { "epoch": 0.6516379006692498, "grad_norm": 0.9786660291757706, "learning_rate": 5.716457660848574e-06, "loss": 0.8615, "step": 4625 }, { "epoch": 0.6517787953504756, "grad_norm": 1.0009164819446708, "learning_rate": 5.712334374698068e-06, "loss": 0.8687, "step": 4626 }, { "epoch": 0.6519196900317012, "grad_norm": 1.2335580754047222, "learning_rate": 5.708211981521794e-06, "loss": 0.4779, "step": 4627 }, { "epoch": 0.652060584712927, "grad_norm": 1.1000216615947258, "learning_rate": 5.70409048217829e-06, "loss": 0.5052, "step": 4628 }, { "epoch": 0.6522014793941528, "grad_norm": 0.9651185412567145, "learning_rate": 5.699969877525939e-06, "loss": 0.816, "step": 4629 }, { "epoch": 0.6523423740753787, "grad_norm": 1.0444255017320387, "learning_rate": 5.695850168422913e-06, "loss": 0.884, "step": 4630 }, { "epoch": 0.6524832687566045, "grad_norm": 0.9757490505823484, "learning_rate": 5.691731355727212e-06, "loss": 0.8219, "step": 4631 }, { "epoch": 0.6526241634378303, "grad_norm": 0.9944954314272515, "learning_rate": 5.6876134402966445e-06, "loss": 0.862, "step": 4632 }, { "epoch": 0.652765058119056, "grad_norm": 1.0428048133721997, "learning_rate": 5.683496422988825e-06, "loss": 0.8393, "step": 4633 }, { "epoch": 0.6529059528002817, "grad_norm": 0.9375016549548535, "learning_rate": 5.6793803046611904e-06, "loss": 0.8751, "step": 4634 }, { "epoch": 0.6530468474815075, "grad_norm": 1.2700765341984848, "learning_rate": 5.675265086170991e-06, "loss": 0.4057, "step": 4635 }, { "epoch": 0.6531877421627333, "grad_norm": 1.273128743837434, "learning_rate": 5.67115076837528e-06, "loss": 0.8077, "step": 4636 }, { "epoch": 0.6533286368439591, "grad_norm": 0.9849593764083395, "learning_rate": 5.667037352130942e-06, "loss": 0.7981, "step": 4637 }, { "epoch": 0.6534695315251849, "grad_norm": 1.0144482308984908, "learning_rate": 5.662924838294641e-06, "loss": 0.8818, "step": 4638 }, { "epoch": 0.6536104262064107, "grad_norm": 1.140445683622501, "learning_rate": 5.65881322772289e-06, "loss": 0.4955, "step": 4639 }, { "epoch": 0.6537513208876365, "grad_norm": 1.12056042720214, "learning_rate": 5.654702521271986e-06, "loss": 0.8395, "step": 4640 }, { "epoch": 0.6538922155688622, "grad_norm": 0.9948744881010284, "learning_rate": 5.650592719798059e-06, "loss": 0.8912, "step": 4641 }, { "epoch": 0.654033110250088, "grad_norm": 0.935972023721886, "learning_rate": 5.646483824157034e-06, "loss": 0.8135, "step": 4642 }, { "epoch": 0.6541740049313138, "grad_norm": 1.21489350022081, "learning_rate": 5.642375835204654e-06, "loss": 0.5636, "step": 4643 }, { "epoch": 0.6543148996125396, "grad_norm": 1.7153588446224601, "learning_rate": 5.63826875379647e-06, "loss": 0.5172, "step": 4644 }, { "epoch": 0.6544557942937654, "grad_norm": 1.0521842412944948, "learning_rate": 5.634162580787852e-06, "loss": 0.869, "step": 4645 }, { "epoch": 0.6545966889749912, "grad_norm": 0.8999119680242361, "learning_rate": 5.63005731703397e-06, "loss": 0.8381, "step": 4646 }, { "epoch": 0.6547375836562169, "grad_norm": 1.039713192307223, "learning_rate": 5.625952963389818e-06, "loss": 0.9202, "step": 4647 }, { "epoch": 0.6548784783374427, "grad_norm": 1.2781172136525578, "learning_rate": 5.621849520710187e-06, "loss": 0.9296, "step": 4648 }, { "epoch": 0.6550193730186685, "grad_norm": 1.334762092449655, "learning_rate": 5.617746989849686e-06, "loss": 0.5417, "step": 4649 }, { "epoch": 0.6551602676998943, "grad_norm": 0.9564809125647236, "learning_rate": 5.613645371662726e-06, "loss": 0.8544, "step": 4650 }, { "epoch": 0.6553011623811201, "grad_norm": 1.1804353308503723, "learning_rate": 5.609544667003544e-06, "loss": 0.4799, "step": 4651 }, { "epoch": 0.6554420570623459, "grad_norm": 1.323151473499922, "learning_rate": 5.60544487672617e-06, "loss": 0.5628, "step": 4652 }, { "epoch": 0.6555829517435717, "grad_norm": 1.0139525504298974, "learning_rate": 5.601346001684454e-06, "loss": 0.8249, "step": 4653 }, { "epoch": 0.6557238464247974, "grad_norm": 0.9568532423093681, "learning_rate": 5.597248042732042e-06, "loss": 0.8364, "step": 4654 }, { "epoch": 0.6558647411060232, "grad_norm": 1.01124189059246, "learning_rate": 5.593151000722412e-06, "loss": 0.8333, "step": 4655 }, { "epoch": 0.656005635787249, "grad_norm": 1.0367169868990755, "learning_rate": 5.5890548765088286e-06, "loss": 0.8541, "step": 4656 }, { "epoch": 0.6561465304684748, "grad_norm": 0.9869392201791953, "learning_rate": 5.58495967094438e-06, "loss": 0.8391, "step": 4657 }, { "epoch": 0.6562874251497006, "grad_norm": 1.1702907571284968, "learning_rate": 5.580865384881955e-06, "loss": 0.8508, "step": 4658 }, { "epoch": 0.6564283198309264, "grad_norm": 1.0547631766446417, "learning_rate": 5.576772019174256e-06, "loss": 0.7911, "step": 4659 }, { "epoch": 0.6565692145121522, "grad_norm": 1.1113460338388867, "learning_rate": 5.572679574673782e-06, "loss": 0.9029, "step": 4660 }, { "epoch": 0.6567101091933779, "grad_norm": 0.9704239594939852, "learning_rate": 5.5685880522328635e-06, "loss": 0.8757, "step": 4661 }, { "epoch": 0.6568510038746037, "grad_norm": 1.1374268934016831, "learning_rate": 5.564497452703617e-06, "loss": 0.8556, "step": 4662 }, { "epoch": 0.6569918985558295, "grad_norm": 1.1341835217732257, "learning_rate": 5.560407776937975e-06, "loss": 0.8853, "step": 4663 }, { "epoch": 0.6571327932370553, "grad_norm": 0.9770155384188536, "learning_rate": 5.556319025787677e-06, "loss": 0.8253, "step": 4664 }, { "epoch": 0.6572736879182811, "grad_norm": 1.2180651538392686, "learning_rate": 5.552231200104265e-06, "loss": 0.4255, "step": 4665 }, { "epoch": 0.6574145825995069, "grad_norm": 1.4102739874186252, "learning_rate": 5.548144300739107e-06, "loss": 0.5869, "step": 4666 }, { "epoch": 0.6575554772807326, "grad_norm": 1.0653002246458878, "learning_rate": 5.544058328543349e-06, "loss": 0.9085, "step": 4667 }, { "epoch": 0.6576963719619584, "grad_norm": 1.0550277157463213, "learning_rate": 5.539973284367977e-06, "loss": 0.8387, "step": 4668 }, { "epoch": 0.6578372666431842, "grad_norm": 1.1421611362407567, "learning_rate": 5.535889169063746e-06, "loss": 0.8524, "step": 4669 }, { "epoch": 0.65797816132441, "grad_norm": 1.1360582841836548, "learning_rate": 5.531805983481253e-06, "loss": 0.8527, "step": 4670 }, { "epoch": 0.6581190560056358, "grad_norm": 1.277059923630259, "learning_rate": 5.527723728470876e-06, "loss": 0.4733, "step": 4671 }, { "epoch": 0.6582599506868616, "grad_norm": 1.2127754850562795, "learning_rate": 5.523642404882818e-06, "loss": 0.6107, "step": 4672 }, { "epoch": 0.6584008453680874, "grad_norm": 1.0952864067745562, "learning_rate": 5.519562013567075e-06, "loss": 0.8918, "step": 4673 }, { "epoch": 0.6585417400493131, "grad_norm": 1.328771755771936, "learning_rate": 5.515482555373454e-06, "loss": 0.5299, "step": 4674 }, { "epoch": 0.6586826347305389, "grad_norm": 0.9982309967516275, "learning_rate": 5.511404031151561e-06, "loss": 0.8892, "step": 4675 }, { "epoch": 0.6588235294117647, "grad_norm": 0.9393696115022635, "learning_rate": 5.507326441750822e-06, "loss": 0.8544, "step": 4676 }, { "epoch": 0.6589644240929905, "grad_norm": 1.2218332348281635, "learning_rate": 5.5032497880204515e-06, "loss": 0.519, "step": 4677 }, { "epoch": 0.6591053187742163, "grad_norm": 1.1013728391127149, "learning_rate": 5.499174070809486e-06, "loss": 0.8422, "step": 4678 }, { "epoch": 0.6592462134554421, "grad_norm": 1.048557034652107, "learning_rate": 5.4950992909667525e-06, "loss": 0.9205, "step": 4679 }, { "epoch": 0.6593871081366678, "grad_norm": 1.0021924359662515, "learning_rate": 5.49102544934089e-06, "loss": 0.8967, "step": 4680 }, { "epoch": 0.6595280028178936, "grad_norm": 0.9609688883992061, "learning_rate": 5.486952546780336e-06, "loss": 0.8215, "step": 4681 }, { "epoch": 0.6596688974991194, "grad_norm": 1.057503592815011, "learning_rate": 5.482880584133343e-06, "loss": 0.8537, "step": 4682 }, { "epoch": 0.6598097921803452, "grad_norm": 1.022041237507866, "learning_rate": 5.478809562247962e-06, "loss": 0.8636, "step": 4683 }, { "epoch": 0.659950686861571, "grad_norm": 1.4886528332947446, "learning_rate": 5.474739481972044e-06, "loss": 0.5215, "step": 4684 }, { "epoch": 0.6600915815427968, "grad_norm": 1.071723473137144, "learning_rate": 5.4706703441532435e-06, "loss": 0.8624, "step": 4685 }, { "epoch": 0.6602324762240226, "grad_norm": 0.9454583533355485, "learning_rate": 5.466602149639033e-06, "loss": 0.8437, "step": 4686 }, { "epoch": 0.6603733709052483, "grad_norm": 1.0266720718569926, "learning_rate": 5.46253489927667e-06, "loss": 0.9264, "step": 4687 }, { "epoch": 0.6605142655864741, "grad_norm": 0.9796368160540537, "learning_rate": 5.4584685939132305e-06, "loss": 0.8954, "step": 4688 }, { "epoch": 0.6606551602676999, "grad_norm": 1.2709992448553342, "learning_rate": 5.454403234395584e-06, "loss": 0.4851, "step": 4689 }, { "epoch": 0.6607960549489257, "grad_norm": 1.0150630580134652, "learning_rate": 5.450338821570406e-06, "loss": 0.8139, "step": 4690 }, { "epoch": 0.6609369496301515, "grad_norm": 1.322699665376891, "learning_rate": 5.44627535628417e-06, "loss": 0.5722, "step": 4691 }, { "epoch": 0.6610778443113773, "grad_norm": 0.9746203824863182, "learning_rate": 5.442212839383165e-06, "loss": 0.877, "step": 4692 }, { "epoch": 0.6612187389926031, "grad_norm": 0.9383019315785077, "learning_rate": 5.438151271713472e-06, "loss": 0.8698, "step": 4693 }, { "epoch": 0.6613596336738288, "grad_norm": 1.070247162976022, "learning_rate": 5.434090654120974e-06, "loss": 0.8361, "step": 4694 }, { "epoch": 0.6615005283550546, "grad_norm": 1.2871709089284118, "learning_rate": 5.430030987451358e-06, "loss": 0.4669, "step": 4695 }, { "epoch": 0.6616414230362804, "grad_norm": 1.1406685013911633, "learning_rate": 5.4259722725501195e-06, "loss": 0.4949, "step": 4696 }, { "epoch": 0.6617823177175062, "grad_norm": 1.0211768172794655, "learning_rate": 5.421914510262543e-06, "loss": 0.9294, "step": 4697 }, { "epoch": 0.661923212398732, "grad_norm": 1.1290788931623987, "learning_rate": 5.41785770143373e-06, "loss": 0.8609, "step": 4698 }, { "epoch": 0.6620641070799578, "grad_norm": 1.0135325707373921, "learning_rate": 5.413801846908571e-06, "loss": 0.8066, "step": 4699 }, { "epoch": 0.6622050017611835, "grad_norm": 1.0488823470117963, "learning_rate": 5.40974694753176e-06, "loss": 0.8318, "step": 4700 }, { "epoch": 0.6623458964424093, "grad_norm": 1.0660957593006617, "learning_rate": 5.405693004147793e-06, "loss": 0.837, "step": 4701 }, { "epoch": 0.6624867911236351, "grad_norm": 1.0355545341530934, "learning_rate": 5.401640017600974e-06, "loss": 0.846, "step": 4702 }, { "epoch": 0.6626276858048609, "grad_norm": 1.034224670239409, "learning_rate": 5.3975879887353955e-06, "loss": 0.8472, "step": 4703 }, { "epoch": 0.6627685804860867, "grad_norm": 0.9460723011166079, "learning_rate": 5.393536918394966e-06, "loss": 0.8391, "step": 4704 }, { "epoch": 0.6629094751673125, "grad_norm": 1.0140252500563458, "learning_rate": 5.389486807423374e-06, "loss": 0.8691, "step": 4705 }, { "epoch": 0.6630503698485383, "grad_norm": 0.9986746426399875, "learning_rate": 5.38543765666412e-06, "loss": 0.824, "step": 4706 }, { "epoch": 0.663191264529764, "grad_norm": 1.1830603352883047, "learning_rate": 5.381389466960511e-06, "loss": 0.5128, "step": 4707 }, { "epoch": 0.6633321592109898, "grad_norm": 1.0523402411770708, "learning_rate": 5.377342239155639e-06, "loss": 0.8478, "step": 4708 }, { "epoch": 0.6634730538922156, "grad_norm": 1.124578714958433, "learning_rate": 5.37329597409241e-06, "loss": 0.8853, "step": 4709 }, { "epoch": 0.6636139485734414, "grad_norm": 0.9932449935180261, "learning_rate": 5.369250672613522e-06, "loss": 0.8082, "step": 4710 }, { "epoch": 0.6637548432546672, "grad_norm": 0.9193329528979043, "learning_rate": 5.365206335561468e-06, "loss": 0.8004, "step": 4711 }, { "epoch": 0.663895737935893, "grad_norm": 1.0769739787196901, "learning_rate": 5.361162963778545e-06, "loss": 0.8693, "step": 4712 }, { "epoch": 0.6640366326171186, "grad_norm": 1.2188692395525003, "learning_rate": 5.3571205581068585e-06, "loss": 0.5372, "step": 4713 }, { "epoch": 0.6641775272983445, "grad_norm": 0.9686849237756161, "learning_rate": 5.353079119388295e-06, "loss": 0.7416, "step": 4714 }, { "epoch": 0.6643184219795703, "grad_norm": 1.0594399232899192, "learning_rate": 5.3490386484645505e-06, "loss": 0.8881, "step": 4715 }, { "epoch": 0.664459316660796, "grad_norm": 1.0254897240280065, "learning_rate": 5.344999146177113e-06, "loss": 0.8402, "step": 4716 }, { "epoch": 0.6646002113420219, "grad_norm": 0.9928674916285614, "learning_rate": 5.340960613367281e-06, "loss": 0.7991, "step": 4717 }, { "epoch": 0.6647411060232477, "grad_norm": 1.0511711727190416, "learning_rate": 5.336923050876133e-06, "loss": 0.8673, "step": 4718 }, { "epoch": 0.6648820007044735, "grad_norm": 1.1468919739277232, "learning_rate": 5.332886459544566e-06, "loss": 0.8286, "step": 4719 }, { "epoch": 0.6650228953856991, "grad_norm": 1.1036744444302462, "learning_rate": 5.328850840213258e-06, "loss": 0.8515, "step": 4720 }, { "epoch": 0.6651637900669249, "grad_norm": 1.0855665280722944, "learning_rate": 5.32481619372269e-06, "loss": 0.8909, "step": 4721 }, { "epoch": 0.6653046847481507, "grad_norm": 1.1366573588232538, "learning_rate": 5.320782520913141e-06, "loss": 0.469, "step": 4722 }, { "epoch": 0.6654455794293765, "grad_norm": 1.0653624009680533, "learning_rate": 5.316749822624692e-06, "loss": 0.8736, "step": 4723 }, { "epoch": 0.6655864741106023, "grad_norm": 1.0090886245452542, "learning_rate": 5.312718099697212e-06, "loss": 0.8422, "step": 4724 }, { "epoch": 0.6657273687918281, "grad_norm": 1.0174241024667112, "learning_rate": 5.308687352970371e-06, "loss": 0.8558, "step": 4725 }, { "epoch": 0.665868263473054, "grad_norm": 1.032969694741924, "learning_rate": 5.304657583283633e-06, "loss": 0.8171, "step": 4726 }, { "epoch": 0.6660091581542796, "grad_norm": 0.927451674898121, "learning_rate": 5.30062879147627e-06, "loss": 0.8749, "step": 4727 }, { "epoch": 0.6661500528355054, "grad_norm": 1.0032882844069027, "learning_rate": 5.296600978387331e-06, "loss": 0.8121, "step": 4728 }, { "epoch": 0.6662909475167312, "grad_norm": 1.360932426497015, "learning_rate": 5.292574144855683e-06, "loss": 0.4755, "step": 4729 }, { "epoch": 0.666431842197957, "grad_norm": 1.2219506453587585, "learning_rate": 5.288548291719972e-06, "loss": 0.5133, "step": 4730 }, { "epoch": 0.6665727368791828, "grad_norm": 1.3844554397667588, "learning_rate": 5.284523419818646e-06, "loss": 0.5475, "step": 4731 }, { "epoch": 0.6667136315604086, "grad_norm": 1.3893980915262072, "learning_rate": 5.280499529989944e-06, "loss": 0.4684, "step": 4732 }, { "epoch": 0.6668545262416343, "grad_norm": 0.982594862193961, "learning_rate": 5.276476623071914e-06, "loss": 0.8496, "step": 4733 }, { "epoch": 0.6669954209228601, "grad_norm": 0.9327447963347285, "learning_rate": 5.272454699902378e-06, "loss": 0.8095, "step": 4734 }, { "epoch": 0.6671363156040859, "grad_norm": 1.0388989640835868, "learning_rate": 5.268433761318983e-06, "loss": 0.8536, "step": 4735 }, { "epoch": 0.6672772102853117, "grad_norm": 0.9858355561502353, "learning_rate": 5.264413808159132e-06, "loss": 0.8054, "step": 4736 }, { "epoch": 0.6674181049665375, "grad_norm": 1.1432244317972715, "learning_rate": 5.260394841260059e-06, "loss": 0.5138, "step": 4737 }, { "epoch": 0.6675589996477633, "grad_norm": 1.0549137389238759, "learning_rate": 5.25637686145877e-06, "loss": 0.8125, "step": 4738 }, { "epoch": 0.6676998943289891, "grad_norm": 1.1380647747258497, "learning_rate": 5.252359869592076e-06, "loss": 0.5379, "step": 4739 }, { "epoch": 0.6678407890102148, "grad_norm": 1.1865123232380523, "learning_rate": 5.248343866496581e-06, "loss": 0.6057, "step": 4740 }, { "epoch": 0.6679816836914406, "grad_norm": 1.0020175558065207, "learning_rate": 5.244328853008679e-06, "loss": 0.8227, "step": 4741 }, { "epoch": 0.6681225783726664, "grad_norm": 0.895202872998001, "learning_rate": 5.2403148299645545e-06, "loss": 0.831, "step": 4742 }, { "epoch": 0.6682634730538922, "grad_norm": 0.9847077638854569, "learning_rate": 5.2363017982002014e-06, "loss": 0.7986, "step": 4743 }, { "epoch": 0.668404367735118, "grad_norm": 1.0241439317543597, "learning_rate": 5.232289758551389e-06, "loss": 0.879, "step": 4744 }, { "epoch": 0.6685452624163438, "grad_norm": 0.9276643485405028, "learning_rate": 5.228278711853698e-06, "loss": 0.8239, "step": 4745 }, { "epoch": 0.6686861570975695, "grad_norm": 1.1252586580328023, "learning_rate": 5.224268658942479e-06, "loss": 0.8636, "step": 4746 }, { "epoch": 0.6688270517787953, "grad_norm": 0.9335586399322375, "learning_rate": 5.2202596006528996e-06, "loss": 0.8385, "step": 4747 }, { "epoch": 0.6689679464600211, "grad_norm": 1.1590725078572552, "learning_rate": 5.216251537819905e-06, "loss": 0.8672, "step": 4748 }, { "epoch": 0.6691088411412469, "grad_norm": 1.0405558170243927, "learning_rate": 5.212244471278234e-06, "loss": 0.8584, "step": 4749 }, { "epoch": 0.6692497358224727, "grad_norm": 0.930945631246954, "learning_rate": 5.208238401862431e-06, "loss": 0.816, "step": 4750 }, { "epoch": 0.6693906305036985, "grad_norm": 1.0357577493688872, "learning_rate": 5.204233330406819e-06, "loss": 0.9103, "step": 4751 }, { "epoch": 0.6695315251849243, "grad_norm": 1.0303694765095783, "learning_rate": 5.200229257745517e-06, "loss": 0.8403, "step": 4752 }, { "epoch": 0.66967241986615, "grad_norm": 0.9970966404924858, "learning_rate": 5.196226184712431e-06, "loss": 0.8997, "step": 4753 }, { "epoch": 0.6698133145473758, "grad_norm": 0.9691821155340995, "learning_rate": 5.192224112141275e-06, "loss": 0.8337, "step": 4754 }, { "epoch": 0.6699542092286016, "grad_norm": 1.3647194675992664, "learning_rate": 5.188223040865538e-06, "loss": 0.5169, "step": 4755 }, { "epoch": 0.6700951039098274, "grad_norm": 1.092178156673414, "learning_rate": 5.184222971718509e-06, "loss": 0.808, "step": 4756 }, { "epoch": 0.6702359985910532, "grad_norm": 0.9270060487487034, "learning_rate": 5.1802239055332595e-06, "loss": 0.8552, "step": 4757 }, { "epoch": 0.670376893272279, "grad_norm": 1.1181071865880274, "learning_rate": 5.176225843142666e-06, "loss": 0.3785, "step": 4758 }, { "epoch": 0.6705177879535048, "grad_norm": 0.8985427031987526, "learning_rate": 5.172228785379381e-06, "loss": 0.8209, "step": 4759 }, { "epoch": 0.6706586826347305, "grad_norm": 1.2464703545174087, "learning_rate": 5.168232733075862e-06, "loss": 0.4504, "step": 4760 }, { "epoch": 0.6707995773159563, "grad_norm": 1.1030390160743075, "learning_rate": 5.164237687064349e-06, "loss": 0.9137, "step": 4761 }, { "epoch": 0.6709404719971821, "grad_norm": 1.5369937240726699, "learning_rate": 5.16024364817687e-06, "loss": 0.603, "step": 4762 }, { "epoch": 0.6710813666784079, "grad_norm": 1.2468323413586146, "learning_rate": 5.1562506172452465e-06, "loss": 0.4386, "step": 4763 }, { "epoch": 0.6712222613596337, "grad_norm": 1.2161893604443221, "learning_rate": 5.152258595101095e-06, "loss": 0.8523, "step": 4764 }, { "epoch": 0.6713631560408595, "grad_norm": 1.114110662007113, "learning_rate": 5.148267582575812e-06, "loss": 0.899, "step": 4765 }, { "epoch": 0.6715040507220852, "grad_norm": 1.1870368627387875, "learning_rate": 5.144277580500599e-06, "loss": 0.5711, "step": 4766 }, { "epoch": 0.671644945403311, "grad_norm": 0.9724594485944896, "learning_rate": 5.140288589706422e-06, "loss": 0.8444, "step": 4767 }, { "epoch": 0.6717858400845368, "grad_norm": 0.9495735398459165, "learning_rate": 5.136300611024062e-06, "loss": 0.7545, "step": 4768 }, { "epoch": 0.6719267347657626, "grad_norm": 1.0986899820427896, "learning_rate": 5.132313645284073e-06, "loss": 0.8627, "step": 4769 }, { "epoch": 0.6720676294469884, "grad_norm": 1.4020328466455874, "learning_rate": 5.12832769331681e-06, "loss": 0.673, "step": 4770 }, { "epoch": 0.6722085241282142, "grad_norm": 1.1286159827644906, "learning_rate": 5.124342755952409e-06, "loss": 0.8956, "step": 4771 }, { "epoch": 0.67234941880944, "grad_norm": 1.2578001996836616, "learning_rate": 5.1203588340207935e-06, "loss": 0.4334, "step": 4772 }, { "epoch": 0.6724903134906657, "grad_norm": 1.0690892496049882, "learning_rate": 5.116375928351676e-06, "loss": 0.8977, "step": 4773 }, { "epoch": 0.6726312081718915, "grad_norm": 0.9333776894808183, "learning_rate": 5.1123940397745655e-06, "loss": 0.8718, "step": 4774 }, { "epoch": 0.6727721028531173, "grad_norm": 1.1792169501395526, "learning_rate": 5.108413169118748e-06, "loss": 0.4592, "step": 4775 }, { "epoch": 0.6729129975343431, "grad_norm": 0.9931544847663654, "learning_rate": 5.104433317213313e-06, "loss": 0.8928, "step": 4776 }, { "epoch": 0.6730538922155689, "grad_norm": 1.293254787818541, "learning_rate": 5.100454484887112e-06, "loss": 0.5603, "step": 4777 }, { "epoch": 0.6731947868967947, "grad_norm": 1.027526233548585, "learning_rate": 5.096476672968813e-06, "loss": 0.8355, "step": 4778 }, { "epoch": 0.6733356815780205, "grad_norm": 1.0699907389186112, "learning_rate": 5.092499882286849e-06, "loss": 0.836, "step": 4779 }, { "epoch": 0.6734765762592462, "grad_norm": 1.2886124384906352, "learning_rate": 5.088524113669458e-06, "loss": 0.4794, "step": 4780 }, { "epoch": 0.673617470940472, "grad_norm": 0.9581370217740257, "learning_rate": 5.084549367944652e-06, "loss": 0.8677, "step": 4781 }, { "epoch": 0.6737583656216978, "grad_norm": 1.0852834133343958, "learning_rate": 5.080575645940236e-06, "loss": 0.8389, "step": 4782 }, { "epoch": 0.6738992603029236, "grad_norm": 1.1775870693683694, "learning_rate": 5.0766029484837955e-06, "loss": 0.4588, "step": 4783 }, { "epoch": 0.6740401549841494, "grad_norm": 1.2912567272857043, "learning_rate": 5.0726312764027175e-06, "loss": 0.4889, "step": 4784 }, { "epoch": 0.6741810496653752, "grad_norm": 0.9730084085880853, "learning_rate": 5.068660630524153e-06, "loss": 0.8614, "step": 4785 }, { "epoch": 0.6743219443466009, "grad_norm": 1.0502187362439608, "learning_rate": 5.064691011675067e-06, "loss": 0.8954, "step": 4786 }, { "epoch": 0.6744628390278267, "grad_norm": 1.020622366348777, "learning_rate": 5.060722420682184e-06, "loss": 0.8659, "step": 4787 }, { "epoch": 0.6746037337090525, "grad_norm": 1.1225002317895785, "learning_rate": 5.05675485837203e-06, "loss": 0.7941, "step": 4788 }, { "epoch": 0.6747446283902783, "grad_norm": 0.9550919770373465, "learning_rate": 5.052788325570913e-06, "loss": 0.8105, "step": 4789 }, { "epoch": 0.6748855230715041, "grad_norm": 1.0321638601111502, "learning_rate": 5.0488228231049204e-06, "loss": 0.9145, "step": 4790 }, { "epoch": 0.6750264177527299, "grad_norm": 1.0843849099506844, "learning_rate": 5.0448583517999404e-06, "loss": 0.8283, "step": 4791 }, { "epoch": 0.6751673124339557, "grad_norm": 1.2177551737149637, "learning_rate": 5.040894912481631e-06, "loss": 0.5319, "step": 4792 }, { "epoch": 0.6753082071151814, "grad_norm": 1.0817225098506704, "learning_rate": 5.036932505975443e-06, "loss": 0.8593, "step": 4793 }, { "epoch": 0.6754491017964072, "grad_norm": 1.2110018687696276, "learning_rate": 5.032971133106605e-06, "loss": 0.8287, "step": 4794 }, { "epoch": 0.675589996477633, "grad_norm": 0.9680057064111639, "learning_rate": 5.029010794700144e-06, "loss": 0.8763, "step": 4795 }, { "epoch": 0.6757308911588588, "grad_norm": 1.0201852575279362, "learning_rate": 5.025051491580857e-06, "loss": 0.8737, "step": 4796 }, { "epoch": 0.6758717858400846, "grad_norm": 1.1002853242658124, "learning_rate": 5.021093224573339e-06, "loss": 0.8565, "step": 4797 }, { "epoch": 0.6760126805213104, "grad_norm": 1.3495328098644737, "learning_rate": 5.017135994501951e-06, "loss": 0.5265, "step": 4798 }, { "epoch": 0.676153575202536, "grad_norm": 1.0935281794003617, "learning_rate": 5.013179802190858e-06, "loss": 0.4211, "step": 4799 }, { "epoch": 0.6762944698837619, "grad_norm": 0.9604358812065114, "learning_rate": 5.009224648463991e-06, "loss": 0.7766, "step": 4800 }, { "epoch": 0.6764353645649877, "grad_norm": 1.1803939835083437, "learning_rate": 5.005270534145082e-06, "loss": 0.5252, "step": 4801 }, { "epoch": 0.6765762592462135, "grad_norm": 1.15408769281668, "learning_rate": 5.001317460057636e-06, "loss": 0.8242, "step": 4802 }, { "epoch": 0.6767171539274393, "grad_norm": 1.090128841866091, "learning_rate": 4.997365427024938e-06, "loss": 0.8699, "step": 4803 }, { "epoch": 0.676858048608665, "grad_norm": 1.17180002125287, "learning_rate": 4.993414435870064e-06, "loss": 0.5378, "step": 4804 }, { "epoch": 0.6769989432898909, "grad_norm": 1.1184448442945278, "learning_rate": 4.989464487415875e-06, "loss": 0.8408, "step": 4805 }, { "epoch": 0.6771398379711165, "grad_norm": 1.4151033578258658, "learning_rate": 4.9855155824850025e-06, "loss": 0.5361, "step": 4806 }, { "epoch": 0.6772807326523423, "grad_norm": 0.9920678964802039, "learning_rate": 4.98156772189988e-06, "loss": 0.8123, "step": 4807 }, { "epoch": 0.6774216273335681, "grad_norm": 1.1841263593678353, "learning_rate": 4.977620906482698e-06, "loss": 0.4991, "step": 4808 }, { "epoch": 0.6775625220147939, "grad_norm": 1.3397141827414567, "learning_rate": 4.973675137055451e-06, "loss": 0.5353, "step": 4809 }, { "epoch": 0.6777034166960197, "grad_norm": 1.113332370469884, "learning_rate": 4.969730414439905e-06, "loss": 0.8668, "step": 4810 }, { "epoch": 0.6778443113772455, "grad_norm": 1.1418173917776766, "learning_rate": 4.9657867394576166e-06, "loss": 0.8648, "step": 4811 }, { "epoch": 0.6779852060584713, "grad_norm": 1.294905689375336, "learning_rate": 4.961844112929915e-06, "loss": 0.4827, "step": 4812 }, { "epoch": 0.678126100739697, "grad_norm": 1.0622050483378798, "learning_rate": 4.957902535677912e-06, "loss": 0.8246, "step": 4813 }, { "epoch": 0.6782669954209228, "grad_norm": 1.4485457152797667, "learning_rate": 4.953962008522504e-06, "loss": 0.6039, "step": 4814 }, { "epoch": 0.6784078901021486, "grad_norm": 1.1312247736407341, "learning_rate": 4.950022532284372e-06, "loss": 0.4705, "step": 4815 }, { "epoch": 0.6785487847833744, "grad_norm": 1.2954229769516086, "learning_rate": 4.94608410778397e-06, "loss": 0.4624, "step": 4816 }, { "epoch": 0.6786896794646002, "grad_norm": 1.06542307791199, "learning_rate": 4.942146735841541e-06, "loss": 0.8842, "step": 4817 }, { "epoch": 0.678830574145826, "grad_norm": 1.346659019869496, "learning_rate": 4.938210417277104e-06, "loss": 0.5461, "step": 4818 }, { "epoch": 0.6789714688270517, "grad_norm": 1.4323650787387439, "learning_rate": 4.93427515291046e-06, "loss": 0.5479, "step": 4819 }, { "epoch": 0.6791123635082775, "grad_norm": 1.013348135679133, "learning_rate": 4.930340943561184e-06, "loss": 0.8857, "step": 4820 }, { "epoch": 0.6792532581895033, "grad_norm": 1.087827889448706, "learning_rate": 4.9264077900486464e-06, "loss": 0.8372, "step": 4821 }, { "epoch": 0.6793941528707291, "grad_norm": 1.0257567008984263, "learning_rate": 4.922475693191985e-06, "loss": 0.8064, "step": 4822 }, { "epoch": 0.6795350475519549, "grad_norm": 1.3254198242105417, "learning_rate": 4.91854465381012e-06, "loss": 0.4469, "step": 4823 }, { "epoch": 0.6796759422331807, "grad_norm": 1.3195085818323604, "learning_rate": 4.91461467272175e-06, "loss": 0.4947, "step": 4824 }, { "epoch": 0.6798168369144065, "grad_norm": 0.9921572283775735, "learning_rate": 4.910685750745364e-06, "loss": 0.839, "step": 4825 }, { "epoch": 0.6799577315956322, "grad_norm": 0.8696732192178178, "learning_rate": 4.906757888699213e-06, "loss": 0.8285, "step": 4826 }, { "epoch": 0.680098626276858, "grad_norm": 1.2976198385373054, "learning_rate": 4.902831087401344e-06, "loss": 0.5203, "step": 4827 }, { "epoch": 0.6802395209580838, "grad_norm": 1.1153550168001252, "learning_rate": 4.8989053476695734e-06, "loss": 0.8769, "step": 4828 }, { "epoch": 0.6803804156393096, "grad_norm": 1.2694711482036485, "learning_rate": 4.8949806703214966e-06, "loss": 0.47, "step": 4829 }, { "epoch": 0.6805213103205354, "grad_norm": 1.3365940213152852, "learning_rate": 4.891057056174488e-06, "loss": 0.5185, "step": 4830 }, { "epoch": 0.6806622050017612, "grad_norm": 1.0890616880881459, "learning_rate": 4.887134506045707e-06, "loss": 0.8349, "step": 4831 }, { "epoch": 0.6808030996829869, "grad_norm": 1.3110898611427444, "learning_rate": 4.883213020752086e-06, "loss": 0.5077, "step": 4832 }, { "epoch": 0.6809439943642127, "grad_norm": 1.2553670510152721, "learning_rate": 4.879292601110335e-06, "loss": 0.421, "step": 4833 }, { "epoch": 0.6810848890454385, "grad_norm": 1.3661890568582407, "learning_rate": 4.875373247936945e-06, "loss": 0.5434, "step": 4834 }, { "epoch": 0.6812257837266643, "grad_norm": 1.29674490953802, "learning_rate": 4.871454962048176e-06, "loss": 0.4745, "step": 4835 }, { "epoch": 0.6813666784078901, "grad_norm": 1.1096253603685824, "learning_rate": 4.867537744260083e-06, "loss": 0.8771, "step": 4836 }, { "epoch": 0.6815075730891159, "grad_norm": 1.103236821838495, "learning_rate": 4.8636215953884815e-06, "loss": 0.8476, "step": 4837 }, { "epoch": 0.6816484677703417, "grad_norm": 1.046865955349231, "learning_rate": 4.859706516248982e-06, "loss": 0.8398, "step": 4838 }, { "epoch": 0.6817893624515674, "grad_norm": 0.9754608399966258, "learning_rate": 4.855792507656947e-06, "loss": 0.8726, "step": 4839 }, { "epoch": 0.6819302571327932, "grad_norm": 1.176744234105368, "learning_rate": 4.851879570427541e-06, "loss": 0.5088, "step": 4840 }, { "epoch": 0.682071151814019, "grad_norm": 1.1102247452085836, "learning_rate": 4.847967705375689e-06, "loss": 0.8128, "step": 4841 }, { "epoch": 0.6822120464952448, "grad_norm": 1.2651328690355022, "learning_rate": 4.8440569133161066e-06, "loss": 0.4957, "step": 4842 }, { "epoch": 0.6823529411764706, "grad_norm": 1.2604000417183923, "learning_rate": 4.8401471950632715e-06, "loss": 0.4792, "step": 4843 }, { "epoch": 0.6824938358576964, "grad_norm": 1.1663084452782742, "learning_rate": 4.836238551431448e-06, "loss": 0.8473, "step": 4844 }, { "epoch": 0.6826347305389222, "grad_norm": 1.0551851276368254, "learning_rate": 4.832330983234667e-06, "loss": 0.808, "step": 4845 }, { "epoch": 0.6827756252201479, "grad_norm": 1.2163123197118033, "learning_rate": 4.82842449128675e-06, "loss": 0.4174, "step": 4846 }, { "epoch": 0.6829165199013737, "grad_norm": 0.9633672409370384, "learning_rate": 4.824519076401277e-06, "loss": 0.8007, "step": 4847 }, { "epoch": 0.6830574145825995, "grad_norm": 1.476498401109719, "learning_rate": 4.820614739391622e-06, "loss": 0.6488, "step": 4848 }, { "epoch": 0.6831983092638253, "grad_norm": 1.0832026548549092, "learning_rate": 4.816711481070918e-06, "loss": 0.858, "step": 4849 }, { "epoch": 0.6833392039450511, "grad_norm": 1.0726647457111953, "learning_rate": 4.812809302252084e-06, "loss": 0.9051, "step": 4850 }, { "epoch": 0.6834800986262769, "grad_norm": 1.0704024720634417, "learning_rate": 4.808908203747804e-06, "loss": 0.839, "step": 4851 }, { "epoch": 0.6836209933075026, "grad_norm": 0.9941090484252698, "learning_rate": 4.805008186370552e-06, "loss": 0.8352, "step": 4852 }, { "epoch": 0.6837618879887284, "grad_norm": 0.9598967144960032, "learning_rate": 4.801109250932566e-06, "loss": 0.8492, "step": 4853 }, { "epoch": 0.6839027826699542, "grad_norm": 1.160991615580608, "learning_rate": 4.797211398245859e-06, "loss": 0.483, "step": 4854 }, { "epoch": 0.68404367735118, "grad_norm": 1.1095109242997303, "learning_rate": 4.793314629122217e-06, "loss": 0.4495, "step": 4855 }, { "epoch": 0.6841845720324058, "grad_norm": 1.055837227436855, "learning_rate": 4.7894189443732116e-06, "loss": 0.8857, "step": 4856 }, { "epoch": 0.6843254667136316, "grad_norm": 1.0365290385564734, "learning_rate": 4.785524344810174e-06, "loss": 0.8291, "step": 4857 }, { "epoch": 0.6844663613948574, "grad_norm": 0.9240220841961103, "learning_rate": 4.781630831244224e-06, "loss": 0.8191, "step": 4858 }, { "epoch": 0.6846072560760831, "grad_norm": 1.016449238910929, "learning_rate": 4.777738404486242e-06, "loss": 0.8436, "step": 4859 }, { "epoch": 0.6847481507573089, "grad_norm": 0.9455946441335666, "learning_rate": 4.773847065346888e-06, "loss": 0.8501, "step": 4860 }, { "epoch": 0.6848890454385347, "grad_norm": 1.0305929145950856, "learning_rate": 4.769956814636593e-06, "loss": 0.8042, "step": 4861 }, { "epoch": 0.6850299401197605, "grad_norm": 1.0916109319277865, "learning_rate": 4.766067653165567e-06, "loss": 0.8443, "step": 4862 }, { "epoch": 0.6851708348009863, "grad_norm": 0.9892157229233849, "learning_rate": 4.76217958174379e-06, "loss": 0.8615, "step": 4863 }, { "epoch": 0.6853117294822121, "grad_norm": 0.9421674711663126, "learning_rate": 4.758292601181011e-06, "loss": 0.8267, "step": 4864 }, { "epoch": 0.6854526241634379, "grad_norm": 1.3354259431483986, "learning_rate": 4.754406712286754e-06, "loss": 0.5349, "step": 4865 }, { "epoch": 0.6855935188446636, "grad_norm": 1.1390760888952138, "learning_rate": 4.7505219158703235e-06, "loss": 0.8746, "step": 4866 }, { "epoch": 0.6857344135258894, "grad_norm": 1.4000918112936707, "learning_rate": 4.746638212740781e-06, "loss": 0.6902, "step": 4867 }, { "epoch": 0.6858753082071152, "grad_norm": 1.2601239100133241, "learning_rate": 4.7427556037069775e-06, "loss": 0.4664, "step": 4868 }, { "epoch": 0.686016202888341, "grad_norm": 1.031148167219892, "learning_rate": 4.738874089577526e-06, "loss": 0.8728, "step": 4869 }, { "epoch": 0.6861570975695668, "grad_norm": 1.367856130244454, "learning_rate": 4.734993671160809e-06, "loss": 0.5461, "step": 4870 }, { "epoch": 0.6862979922507926, "grad_norm": 1.2574154318189634, "learning_rate": 4.7311143492649855e-06, "loss": 0.6496, "step": 4871 }, { "epoch": 0.6864388869320183, "grad_norm": 1.2155785907111212, "learning_rate": 4.727236124697991e-06, "loss": 0.8464, "step": 4872 }, { "epoch": 0.6865797816132441, "grad_norm": 1.009510140452615, "learning_rate": 4.7233589982675244e-06, "loss": 0.8138, "step": 4873 }, { "epoch": 0.6867206762944699, "grad_norm": 1.0441673261582634, "learning_rate": 4.7194829707810585e-06, "loss": 0.8276, "step": 4874 }, { "epoch": 0.6868615709756957, "grad_norm": 1.0967737723620954, "learning_rate": 4.715608043045836e-06, "loss": 0.8855, "step": 4875 }, { "epoch": 0.6870024656569215, "grad_norm": 1.2684228200749779, "learning_rate": 4.71173421586887e-06, "loss": 0.455, "step": 4876 }, { "epoch": 0.6871433603381473, "grad_norm": 1.1130872527114961, "learning_rate": 4.707861490056952e-06, "loss": 0.94, "step": 4877 }, { "epoch": 0.6872842550193731, "grad_norm": 0.992766942791681, "learning_rate": 4.7039898664166314e-06, "loss": 0.8559, "step": 4878 }, { "epoch": 0.6874251497005988, "grad_norm": 1.0676691283578366, "learning_rate": 4.700119345754245e-06, "loss": 0.8159, "step": 4879 }, { "epoch": 0.6875660443818246, "grad_norm": 1.0136275387301028, "learning_rate": 4.696249928875884e-06, "loss": 0.8223, "step": 4880 }, { "epoch": 0.6877069390630504, "grad_norm": 1.4040752914000492, "learning_rate": 4.692381616587414e-06, "loss": 0.5227, "step": 4881 }, { "epoch": 0.6878478337442762, "grad_norm": 1.2780002036441143, "learning_rate": 4.688514409694473e-06, "loss": 0.5686, "step": 4882 }, { "epoch": 0.687988728425502, "grad_norm": 1.0054848897010484, "learning_rate": 4.684648309002472e-06, "loss": 0.8806, "step": 4883 }, { "epoch": 0.6881296231067278, "grad_norm": 0.9976254698069323, "learning_rate": 4.680783315316586e-06, "loss": 0.8421, "step": 4884 }, { "epoch": 0.6882705177879535, "grad_norm": 1.2793893704297263, "learning_rate": 4.676919429441758e-06, "loss": 0.4366, "step": 4885 }, { "epoch": 0.6884114124691793, "grad_norm": 1.2678518865077755, "learning_rate": 4.673056652182703e-06, "loss": 0.4627, "step": 4886 }, { "epoch": 0.688552307150405, "grad_norm": 1.328246637684583, "learning_rate": 4.669194984343913e-06, "loss": 0.6513, "step": 4887 }, { "epoch": 0.6886932018316309, "grad_norm": 1.3972496897453561, "learning_rate": 4.665334426729632e-06, "loss": 0.5342, "step": 4888 }, { "epoch": 0.6888340965128567, "grad_norm": 0.8910811991607169, "learning_rate": 4.661474980143889e-06, "loss": 0.8425, "step": 4889 }, { "epoch": 0.6889749911940825, "grad_norm": 0.976583232956338, "learning_rate": 4.657616645390475e-06, "loss": 0.7732, "step": 4890 }, { "epoch": 0.6891158858753083, "grad_norm": 1.0023924970645297, "learning_rate": 4.653759423272945e-06, "loss": 0.7738, "step": 4891 }, { "epoch": 0.6892567805565339, "grad_norm": 1.3288075641856492, "learning_rate": 4.649903314594625e-06, "loss": 0.534, "step": 4892 }, { "epoch": 0.6893976752377597, "grad_norm": 1.0076497882092668, "learning_rate": 4.646048320158617e-06, "loss": 0.8526, "step": 4893 }, { "epoch": 0.6895385699189855, "grad_norm": 1.0209644358973304, "learning_rate": 4.642194440767783e-06, "loss": 0.8451, "step": 4894 }, { "epoch": 0.6896794646002113, "grad_norm": 1.2033585067921377, "learning_rate": 4.638341677224752e-06, "loss": 0.5038, "step": 4895 }, { "epoch": 0.6898203592814371, "grad_norm": 0.9614352427141213, "learning_rate": 4.634490030331921e-06, "loss": 0.8771, "step": 4896 }, { "epoch": 0.689961253962663, "grad_norm": 1.0702280826832224, "learning_rate": 4.630639500891463e-06, "loss": 0.8677, "step": 4897 }, { "epoch": 0.6901021486438887, "grad_norm": 0.981400246514651, "learning_rate": 4.626790089705303e-06, "loss": 0.8047, "step": 4898 }, { "epoch": 0.6902430433251144, "grad_norm": 1.0220099557150328, "learning_rate": 4.6229417975751524e-06, "loss": 0.7863, "step": 4899 }, { "epoch": 0.6903839380063402, "grad_norm": 1.2016527021574528, "learning_rate": 4.619094625302473e-06, "loss": 0.4642, "step": 4900 }, { "epoch": 0.690524832687566, "grad_norm": 1.345544602600381, "learning_rate": 4.615248573688499e-06, "loss": 0.4854, "step": 4901 }, { "epoch": 0.6906657273687918, "grad_norm": 1.0182658922657206, "learning_rate": 4.611403643534229e-06, "loss": 0.8314, "step": 4902 }, { "epoch": 0.6908066220500176, "grad_norm": 1.104151272742343, "learning_rate": 4.607559835640438e-06, "loss": 0.795, "step": 4903 }, { "epoch": 0.6909475167312434, "grad_norm": 1.0249610824925464, "learning_rate": 4.603717150807652e-06, "loss": 0.8348, "step": 4904 }, { "epoch": 0.6910884114124691, "grad_norm": 1.1437549380410708, "learning_rate": 4.599875589836182e-06, "loss": 0.4376, "step": 4905 }, { "epoch": 0.6912293060936949, "grad_norm": 1.4400448810197732, "learning_rate": 4.596035153526077e-06, "loss": 0.5394, "step": 4906 }, { "epoch": 0.6913702007749207, "grad_norm": 1.033027189701184, "learning_rate": 4.592195842677184e-06, "loss": 0.8921, "step": 4907 }, { "epoch": 0.6915110954561465, "grad_norm": 1.3379102531306195, "learning_rate": 4.5883576580890896e-06, "loss": 0.5181, "step": 4908 }, { "epoch": 0.6916519901373723, "grad_norm": 1.2130402703356133, "learning_rate": 4.584520600561165e-06, "loss": 0.5474, "step": 4909 }, { "epoch": 0.6917928848185981, "grad_norm": 1.134959782073421, "learning_rate": 4.580684670892535e-06, "loss": 0.4571, "step": 4910 }, { "epoch": 0.6919337794998239, "grad_norm": 1.1787096936036232, "learning_rate": 4.576849869882093e-06, "loss": 0.4837, "step": 4911 }, { "epoch": 0.6920746741810496, "grad_norm": 1.0710017361631594, "learning_rate": 4.57301619832849e-06, "loss": 0.8758, "step": 4912 }, { "epoch": 0.6922155688622754, "grad_norm": 1.186879812442139, "learning_rate": 4.569183657030161e-06, "loss": 0.8151, "step": 4913 }, { "epoch": 0.6923564635435012, "grad_norm": 1.0209575530426596, "learning_rate": 4.565352246785282e-06, "loss": 0.8328, "step": 4914 }, { "epoch": 0.692497358224727, "grad_norm": 1.0791001473009507, "learning_rate": 4.5615219683918175e-06, "loss": 0.8743, "step": 4915 }, { "epoch": 0.6926382529059528, "grad_norm": 0.8934032378063308, "learning_rate": 4.557692822647473e-06, "loss": 0.8296, "step": 4916 }, { "epoch": 0.6927791475871786, "grad_norm": 1.1199235529478428, "learning_rate": 4.553864810349728e-06, "loss": 0.4951, "step": 4917 }, { "epoch": 0.6929200422684043, "grad_norm": 1.2286244040439533, "learning_rate": 4.5500379322958335e-06, "loss": 0.4954, "step": 4918 }, { "epoch": 0.6930609369496301, "grad_norm": 1.6422725263250806, "learning_rate": 4.546212189282791e-06, "loss": 0.4966, "step": 4919 }, { "epoch": 0.6932018316308559, "grad_norm": 1.0709268968335612, "learning_rate": 4.542387582107379e-06, "loss": 0.8506, "step": 4920 }, { "epoch": 0.6933427263120817, "grad_norm": 1.0266833321740343, "learning_rate": 4.53856411156613e-06, "loss": 0.835, "step": 4921 }, { "epoch": 0.6934836209933075, "grad_norm": 1.053186571270815, "learning_rate": 4.534741778455341e-06, "loss": 0.799, "step": 4922 }, { "epoch": 0.6936245156745333, "grad_norm": 0.982583035588133, "learning_rate": 4.530920583571068e-06, "loss": 0.8444, "step": 4923 }, { "epoch": 0.6937654103557591, "grad_norm": 1.259257360959457, "learning_rate": 4.527100527709146e-06, "loss": 0.4763, "step": 4924 }, { "epoch": 0.6939063050369848, "grad_norm": 1.075335276885201, "learning_rate": 4.523281611665157e-06, "loss": 0.8089, "step": 4925 }, { "epoch": 0.6940471997182106, "grad_norm": 1.002129892921009, "learning_rate": 4.5194638362344505e-06, "loss": 0.8062, "step": 4926 }, { "epoch": 0.6941880943994364, "grad_norm": 0.9726039502783609, "learning_rate": 4.515647202212135e-06, "loss": 0.8574, "step": 4927 }, { "epoch": 0.6943289890806622, "grad_norm": 0.9851367749804937, "learning_rate": 4.511831710393092e-06, "loss": 0.8678, "step": 4928 }, { "epoch": 0.694469883761888, "grad_norm": 1.3052962268550234, "learning_rate": 4.508017361571951e-06, "loss": 0.5475, "step": 4929 }, { "epoch": 0.6946107784431138, "grad_norm": 0.9801437973513726, "learning_rate": 4.5042041565431195e-06, "loss": 0.8177, "step": 4930 }, { "epoch": 0.6947516731243396, "grad_norm": 0.9811604883431577, "learning_rate": 4.500392096100752e-06, "loss": 0.846, "step": 4931 }, { "epoch": 0.6948925678055653, "grad_norm": 0.9891829245758452, "learning_rate": 4.496581181038771e-06, "loss": 0.8431, "step": 4932 }, { "epoch": 0.6950334624867911, "grad_norm": 1.1666099305362596, "learning_rate": 4.4927714121508545e-06, "loss": 0.5425, "step": 4933 }, { "epoch": 0.6951743571680169, "grad_norm": 1.0567685728036362, "learning_rate": 4.488962790230458e-06, "loss": 0.8475, "step": 4934 }, { "epoch": 0.6953152518492427, "grad_norm": 1.3249255449709547, "learning_rate": 4.485155316070778e-06, "loss": 0.5821, "step": 4935 }, { "epoch": 0.6954561465304685, "grad_norm": 1.0886306886720818, "learning_rate": 4.481348990464792e-06, "loss": 0.8726, "step": 4936 }, { "epoch": 0.6955970412116943, "grad_norm": 1.1244965045265587, "learning_rate": 4.477543814205214e-06, "loss": 0.8514, "step": 4937 }, { "epoch": 0.69573793589292, "grad_norm": 1.2376984806182165, "learning_rate": 4.473739788084542e-06, "loss": 0.4926, "step": 4938 }, { "epoch": 0.6958788305741458, "grad_norm": 1.4569384838440977, "learning_rate": 4.469936912895018e-06, "loss": 0.478, "step": 4939 }, { "epoch": 0.6960197252553716, "grad_norm": 1.1670367926042515, "learning_rate": 4.466135189428658e-06, "loss": 0.4978, "step": 4940 }, { "epoch": 0.6961606199365974, "grad_norm": 1.0371492835370366, "learning_rate": 4.462334618477227e-06, "loss": 0.8874, "step": 4941 }, { "epoch": 0.6963015146178232, "grad_norm": 1.0108569866187758, "learning_rate": 4.458535200832256e-06, "loss": 0.84, "step": 4942 }, { "epoch": 0.696442409299049, "grad_norm": 1.1771813816882788, "learning_rate": 4.454736937285028e-06, "loss": 0.5258, "step": 4943 }, { "epoch": 0.6965833039802748, "grad_norm": 0.9696576648871026, "learning_rate": 4.4509398286265995e-06, "loss": 0.8287, "step": 4944 }, { "epoch": 0.6967241986615005, "grad_norm": 1.0678476860293387, "learning_rate": 4.44714387564777e-06, "loss": 0.8106, "step": 4945 }, { "epoch": 0.6968650933427263, "grad_norm": 0.9493776091701109, "learning_rate": 4.443349079139121e-06, "loss": 0.8665, "step": 4946 }, { "epoch": 0.6970059880239521, "grad_norm": 0.9787888510611167, "learning_rate": 4.439555439890961e-06, "loss": 0.8731, "step": 4947 }, { "epoch": 0.6971468827051779, "grad_norm": 1.0163553100768687, "learning_rate": 4.435762958693388e-06, "loss": 0.8453, "step": 4948 }, { "epoch": 0.6972877773864037, "grad_norm": 1.2637502159920304, "learning_rate": 4.43197163633624e-06, "loss": 0.6195, "step": 4949 }, { "epoch": 0.6974286720676295, "grad_norm": 1.334601188691889, "learning_rate": 4.4281814736091245e-06, "loss": 0.5339, "step": 4950 }, { "epoch": 0.6975695667488553, "grad_norm": 0.9975597922651003, "learning_rate": 4.4243924713013996e-06, "loss": 0.8805, "step": 4951 }, { "epoch": 0.697710461430081, "grad_norm": 1.0585643045571738, "learning_rate": 4.420604630202187e-06, "loss": 0.8459, "step": 4952 }, { "epoch": 0.6978513561113068, "grad_norm": 1.0371281703346162, "learning_rate": 4.416817951100359e-06, "loss": 0.8387, "step": 4953 }, { "epoch": 0.6979922507925326, "grad_norm": 1.0433542175618842, "learning_rate": 4.413032434784561e-06, "loss": 0.8779, "step": 4954 }, { "epoch": 0.6981331454737584, "grad_norm": 0.9941573388024347, "learning_rate": 4.409248082043177e-06, "loss": 0.8623, "step": 4955 }, { "epoch": 0.6982740401549842, "grad_norm": 1.4049656470425576, "learning_rate": 4.405464893664368e-06, "loss": 0.5768, "step": 4956 }, { "epoch": 0.69841493483621, "grad_norm": 1.2338016025104726, "learning_rate": 4.4016828704360395e-06, "loss": 0.8539, "step": 4957 }, { "epoch": 0.6985558295174357, "grad_norm": 1.4811172324037976, "learning_rate": 4.397902013145849e-06, "loss": 0.5765, "step": 4958 }, { "epoch": 0.6986967241986615, "grad_norm": 0.9256229472194557, "learning_rate": 4.394122322581231e-06, "loss": 0.8736, "step": 4959 }, { "epoch": 0.6988376188798873, "grad_norm": 1.4578175382911942, "learning_rate": 4.390343799529359e-06, "loss": 0.5144, "step": 4960 }, { "epoch": 0.6989785135611131, "grad_norm": 1.0092503095860557, "learning_rate": 4.386566444777176e-06, "loss": 0.8684, "step": 4961 }, { "epoch": 0.6991194082423389, "grad_norm": 0.8649347620680106, "learning_rate": 4.382790259111372e-06, "loss": 0.8251, "step": 4962 }, { "epoch": 0.6992603029235647, "grad_norm": 1.044079131874654, "learning_rate": 4.3790152433184e-06, "loss": 0.8464, "step": 4963 }, { "epoch": 0.6994011976047905, "grad_norm": 1.116550501606131, "learning_rate": 4.37524139818446e-06, "loss": 0.8758, "step": 4964 }, { "epoch": 0.6995420922860162, "grad_norm": 1.1009610552995057, "learning_rate": 4.3714687244955245e-06, "loss": 0.8932, "step": 4965 }, { "epoch": 0.699682986967242, "grad_norm": 1.2284643697681288, "learning_rate": 4.367697223037304e-06, "loss": 0.8803, "step": 4966 }, { "epoch": 0.6998238816484678, "grad_norm": 0.9703802146925732, "learning_rate": 4.363926894595285e-06, "loss": 0.8249, "step": 4967 }, { "epoch": 0.6999647763296936, "grad_norm": 1.2283232729577507, "learning_rate": 4.3601577399546835e-06, "loss": 0.4576, "step": 4968 }, { "epoch": 0.7001056710109194, "grad_norm": 1.009228530428466, "learning_rate": 4.356389759900495e-06, "loss": 0.8074, "step": 4969 }, { "epoch": 0.7002465656921452, "grad_norm": 1.0546565357349822, "learning_rate": 4.352622955217456e-06, "loss": 0.8488, "step": 4970 }, { "epoch": 0.7003874603733709, "grad_norm": 0.9448940422744643, "learning_rate": 4.3488573266900705e-06, "loss": 0.8676, "step": 4971 }, { "epoch": 0.7005283550545967, "grad_norm": 1.1778340688695235, "learning_rate": 4.345092875102583e-06, "loss": 0.5076, "step": 4972 }, { "epoch": 0.7006692497358225, "grad_norm": 0.9088711383064024, "learning_rate": 4.341329601239005e-06, "loss": 0.8581, "step": 4973 }, { "epoch": 0.7008101444170483, "grad_norm": 0.9402985027556263, "learning_rate": 4.33756750588309e-06, "loss": 0.8349, "step": 4974 }, { "epoch": 0.700951039098274, "grad_norm": 0.9074316805150938, "learning_rate": 4.333806589818364e-06, "loss": 0.8015, "step": 4975 }, { "epoch": 0.7010919337794999, "grad_norm": 1.1016169562008433, "learning_rate": 4.3300468538280895e-06, "loss": 0.7975, "step": 4976 }, { "epoch": 0.7012328284607257, "grad_norm": 1.0594942597450334, "learning_rate": 4.3262882986953015e-06, "loss": 0.7979, "step": 4977 }, { "epoch": 0.7013737231419513, "grad_norm": 1.035288782010999, "learning_rate": 4.3225309252027636e-06, "loss": 0.8649, "step": 4978 }, { "epoch": 0.7015146178231771, "grad_norm": 1.1255722993579957, "learning_rate": 4.31877473413302e-06, "loss": 0.8496, "step": 4979 }, { "epoch": 0.701655512504403, "grad_norm": 1.258406377643974, "learning_rate": 4.315019726268349e-06, "loss": 0.4407, "step": 4980 }, { "epoch": 0.7017964071856287, "grad_norm": 1.006539982935423, "learning_rate": 4.311265902390798e-06, "loss": 0.8726, "step": 4981 }, { "epoch": 0.7019373018668545, "grad_norm": 1.1398025264540885, "learning_rate": 4.307513263282156e-06, "loss": 0.449, "step": 4982 }, { "epoch": 0.7020781965480803, "grad_norm": 1.3123629747569474, "learning_rate": 4.303761809723971e-06, "loss": 0.4819, "step": 4983 }, { "epoch": 0.7022190912293061, "grad_norm": 0.990761355005753, "learning_rate": 4.300011542497538e-06, "loss": 0.8187, "step": 4984 }, { "epoch": 0.7023599859105318, "grad_norm": 1.1860019637031272, "learning_rate": 4.296262462383916e-06, "loss": 0.8888, "step": 4985 }, { "epoch": 0.7025008805917576, "grad_norm": 1.2177496391385667, "learning_rate": 4.292514570163905e-06, "loss": 0.4511, "step": 4986 }, { "epoch": 0.7026417752729834, "grad_norm": 1.020221635832475, "learning_rate": 4.288767866618068e-06, "loss": 0.8583, "step": 4987 }, { "epoch": 0.7027826699542092, "grad_norm": 1.4604040143165784, "learning_rate": 4.285022352526713e-06, "loss": 0.53, "step": 4988 }, { "epoch": 0.702923564635435, "grad_norm": 1.0040312511741012, "learning_rate": 4.281278028669902e-06, "loss": 0.852, "step": 4989 }, { "epoch": 0.7030644593166608, "grad_norm": 0.9488148527908292, "learning_rate": 4.277534895827446e-06, "loss": 0.8631, "step": 4990 }, { "epoch": 0.7032053539978865, "grad_norm": 0.90654050257728, "learning_rate": 4.273792954778921e-06, "loss": 0.8782, "step": 4991 }, { "epoch": 0.7033462486791123, "grad_norm": 0.9928587274178909, "learning_rate": 4.270052206303637e-06, "loss": 0.8658, "step": 4992 }, { "epoch": 0.7034871433603381, "grad_norm": 1.1540379156691378, "learning_rate": 4.26631265118067e-06, "loss": 0.902, "step": 4993 }, { "epoch": 0.7036280380415639, "grad_norm": 1.0924625736824138, "learning_rate": 4.2625742901888335e-06, "loss": 0.4533, "step": 4994 }, { "epoch": 0.7037689327227897, "grad_norm": 0.9526272092072869, "learning_rate": 4.258837124106709e-06, "loss": 0.8569, "step": 4995 }, { "epoch": 0.7039098274040155, "grad_norm": 1.0264564081380116, "learning_rate": 4.255101153712614e-06, "loss": 0.8768, "step": 4996 }, { "epoch": 0.7040507220852413, "grad_norm": 1.0297520732033385, "learning_rate": 4.251366379784629e-06, "loss": 0.7886, "step": 4997 }, { "epoch": 0.704191616766467, "grad_norm": 1.1144529273050872, "learning_rate": 4.247632803100579e-06, "loss": 0.8245, "step": 4998 }, { "epoch": 0.7043325114476928, "grad_norm": 0.8536408934043054, "learning_rate": 4.243900424438038e-06, "loss": 0.7764, "step": 4999 }, { "epoch": 0.7044734061289186, "grad_norm": 1.2834446353154816, "learning_rate": 4.240169244574333e-06, "loss": 0.4385, "step": 5000 }, { "epoch": 0.7046143008101444, "grad_norm": 1.036410016764778, "learning_rate": 4.2364392642865385e-06, "loss": 0.8228, "step": 5001 }, { "epoch": 0.7047551954913702, "grad_norm": 1.0467403679785912, "learning_rate": 4.232710484351488e-06, "loss": 0.8202, "step": 5002 }, { "epoch": 0.704896090172596, "grad_norm": 1.0084044049391137, "learning_rate": 4.228982905545757e-06, "loss": 0.8613, "step": 5003 }, { "epoch": 0.7050369848538217, "grad_norm": 0.9727248105922777, "learning_rate": 4.225256528645671e-06, "loss": 0.8259, "step": 5004 }, { "epoch": 0.7051778795350475, "grad_norm": 1.0068362402128952, "learning_rate": 4.221531354427306e-06, "loss": 0.8073, "step": 5005 }, { "epoch": 0.7053187742162733, "grad_norm": 1.0592396079162336, "learning_rate": 4.217807383666494e-06, "loss": 0.8546, "step": 5006 }, { "epoch": 0.7054596688974991, "grad_norm": 1.3548085157321106, "learning_rate": 4.214084617138802e-06, "loss": 0.5693, "step": 5007 }, { "epoch": 0.7056005635787249, "grad_norm": 0.9683428557253231, "learning_rate": 4.2103630556195655e-06, "loss": 0.8391, "step": 5008 }, { "epoch": 0.7057414582599507, "grad_norm": 0.9903387687460887, "learning_rate": 4.2066426998838514e-06, "loss": 0.8472, "step": 5009 }, { "epoch": 0.7058823529411765, "grad_norm": 0.9993743546781603, "learning_rate": 4.202923550706487e-06, "loss": 0.8569, "step": 5010 }, { "epoch": 0.7060232476224022, "grad_norm": 1.1208713191850137, "learning_rate": 4.199205608862037e-06, "loss": 0.898, "step": 5011 }, { "epoch": 0.706164142303628, "grad_norm": 1.1040553878982131, "learning_rate": 4.195488875124829e-06, "loss": 0.8765, "step": 5012 }, { "epoch": 0.7063050369848538, "grad_norm": 0.964103211535468, "learning_rate": 4.191773350268931e-06, "loss": 0.8104, "step": 5013 }, { "epoch": 0.7064459316660796, "grad_norm": 0.8959870050773364, "learning_rate": 4.188059035068156e-06, "loss": 0.8054, "step": 5014 }, { "epoch": 0.7065868263473054, "grad_norm": 1.281555723118161, "learning_rate": 4.184345930296068e-06, "loss": 0.7178, "step": 5015 }, { "epoch": 0.7067277210285312, "grad_norm": 0.9590043465773093, "learning_rate": 4.180634036725985e-06, "loss": 0.7813, "step": 5016 }, { "epoch": 0.706868615709757, "grad_norm": 1.0879043653697176, "learning_rate": 4.176923355130962e-06, "loss": 0.8352, "step": 5017 }, { "epoch": 0.7070095103909827, "grad_norm": 1.0287031121842685, "learning_rate": 4.173213886283814e-06, "loss": 0.788, "step": 5018 }, { "epoch": 0.7071504050722085, "grad_norm": 1.055446923880971, "learning_rate": 4.169505630957094e-06, "loss": 0.8162, "step": 5019 }, { "epoch": 0.7072912997534343, "grad_norm": 1.0895928685512164, "learning_rate": 4.165798589923102e-06, "loss": 0.8694, "step": 5020 }, { "epoch": 0.7074321944346601, "grad_norm": 1.0884383096567845, "learning_rate": 4.162092763953887e-06, "loss": 0.493, "step": 5021 }, { "epoch": 0.7075730891158859, "grad_norm": 1.1471488545427395, "learning_rate": 4.158388153821252e-06, "loss": 0.8461, "step": 5022 }, { "epoch": 0.7077139837971117, "grad_norm": 0.9524114391645218, "learning_rate": 4.154684760296737e-06, "loss": 0.8021, "step": 5023 }, { "epoch": 0.7078548784783374, "grad_norm": 1.0998320151717054, "learning_rate": 4.1509825841516325e-06, "loss": 0.8642, "step": 5024 }, { "epoch": 0.7079957731595632, "grad_norm": 1.0856447998920806, "learning_rate": 4.147281626156972e-06, "loss": 0.8938, "step": 5025 }, { "epoch": 0.708136667840789, "grad_norm": 1.170575955376404, "learning_rate": 4.143581887083547e-06, "loss": 0.4815, "step": 5026 }, { "epoch": 0.7082775625220148, "grad_norm": 1.0267149385455494, "learning_rate": 4.1398833677018755e-06, "loss": 0.8626, "step": 5027 }, { "epoch": 0.7084184572032406, "grad_norm": 1.0866029588503983, "learning_rate": 4.136186068782244e-06, "loss": 0.4503, "step": 5028 }, { "epoch": 0.7085593518844664, "grad_norm": 0.9890008694028661, "learning_rate": 4.132489991094668e-06, "loss": 0.8328, "step": 5029 }, { "epoch": 0.7087002465656922, "grad_norm": 0.9688659756141589, "learning_rate": 4.128795135408915e-06, "loss": 0.829, "step": 5030 }, { "epoch": 0.7088411412469179, "grad_norm": 1.00641474547167, "learning_rate": 4.125101502494493e-06, "loss": 0.8749, "step": 5031 }, { "epoch": 0.7089820359281437, "grad_norm": 1.2055454545920352, "learning_rate": 4.121409093120667e-06, "loss": 0.4809, "step": 5032 }, { "epoch": 0.7091229306093695, "grad_norm": 1.172781030064135, "learning_rate": 4.117717908056436e-06, "loss": 0.5076, "step": 5033 }, { "epoch": 0.7092638252905953, "grad_norm": 0.8691697219633747, "learning_rate": 4.114027948070547e-06, "loss": 0.7492, "step": 5034 }, { "epoch": 0.7094047199718211, "grad_norm": 1.0136729443217476, "learning_rate": 4.11033921393149e-06, "loss": 0.8305, "step": 5035 }, { "epoch": 0.7095456146530469, "grad_norm": 1.0731485464802146, "learning_rate": 4.106651706407508e-06, "loss": 0.8373, "step": 5036 }, { "epoch": 0.7096865093342727, "grad_norm": 0.9900636964226947, "learning_rate": 4.102965426266578e-06, "loss": 0.8603, "step": 5037 }, { "epoch": 0.7098274040154984, "grad_norm": 1.0099785421596277, "learning_rate": 4.099280374276432e-06, "loss": 0.8661, "step": 5038 }, { "epoch": 0.7099682986967242, "grad_norm": 1.1646030088471433, "learning_rate": 4.095596551204537e-06, "loss": 0.8817, "step": 5039 }, { "epoch": 0.71010919337795, "grad_norm": 1.0288772125275611, "learning_rate": 4.091913957818106e-06, "loss": 0.8719, "step": 5040 }, { "epoch": 0.7102500880591758, "grad_norm": 1.0555847873993562, "learning_rate": 4.0882325948841e-06, "loss": 0.8196, "step": 5041 }, { "epoch": 0.7103909827404016, "grad_norm": 1.0157680927829822, "learning_rate": 4.084552463169216e-06, "loss": 0.804, "step": 5042 }, { "epoch": 0.7105318774216274, "grad_norm": 1.3395666429240665, "learning_rate": 4.080873563439907e-06, "loss": 0.5684, "step": 5043 }, { "epoch": 0.7106727721028531, "grad_norm": 0.994646386612289, "learning_rate": 4.077195896462358e-06, "loss": 0.8499, "step": 5044 }, { "epoch": 0.7108136667840789, "grad_norm": 0.9908107763141615, "learning_rate": 4.073519463002502e-06, "loss": 0.8474, "step": 5045 }, { "epoch": 0.7109545614653047, "grad_norm": 0.9120275282014011, "learning_rate": 4.069844263826012e-06, "loss": 0.7971, "step": 5046 }, { "epoch": 0.7110954561465305, "grad_norm": 1.3812175170216758, "learning_rate": 4.066170299698311e-06, "loss": 0.5025, "step": 5047 }, { "epoch": 0.7112363508277563, "grad_norm": 1.2775432518520546, "learning_rate": 4.062497571384555e-06, "loss": 0.5306, "step": 5048 }, { "epoch": 0.7113772455089821, "grad_norm": 0.978449487538991, "learning_rate": 4.058826079649653e-06, "loss": 0.7974, "step": 5049 }, { "epoch": 0.7115181401902079, "grad_norm": 1.1640798733059163, "learning_rate": 4.05515582525825e-06, "loss": 0.8833, "step": 5050 }, { "epoch": 0.7116590348714336, "grad_norm": 1.3720390769682753, "learning_rate": 4.051486808974734e-06, "loss": 0.5205, "step": 5051 }, { "epoch": 0.7117999295526594, "grad_norm": 1.0369874910450385, "learning_rate": 4.047819031563232e-06, "loss": 0.7649, "step": 5052 }, { "epoch": 0.7119408242338852, "grad_norm": 1.0567248994950054, "learning_rate": 4.044152493787622e-06, "loss": 0.7562, "step": 5053 }, { "epoch": 0.712081718915111, "grad_norm": 1.131546815167748, "learning_rate": 4.040487196411517e-06, "loss": 0.4708, "step": 5054 }, { "epoch": 0.7122226135963368, "grad_norm": 1.0412417744689775, "learning_rate": 4.036823140198272e-06, "loss": 0.8964, "step": 5055 }, { "epoch": 0.7123635082775626, "grad_norm": 0.9269652963935887, "learning_rate": 4.033160325910983e-06, "loss": 0.7917, "step": 5056 }, { "epoch": 0.7125044029587883, "grad_norm": 1.0392723170556561, "learning_rate": 4.0294987543124955e-06, "loss": 0.8387, "step": 5057 }, { "epoch": 0.712645297640014, "grad_norm": 1.30844375608952, "learning_rate": 4.0258384261653805e-06, "loss": 0.5584, "step": 5058 }, { "epoch": 0.7127861923212399, "grad_norm": 1.0191651393207342, "learning_rate": 4.0221793422319685e-06, "loss": 0.8885, "step": 5059 }, { "epoch": 0.7129270870024657, "grad_norm": 1.1967239236849185, "learning_rate": 4.018521503274317e-06, "loss": 0.4599, "step": 5060 }, { "epoch": 0.7130679816836915, "grad_norm": 1.3001294338106064, "learning_rate": 4.0148649100542316e-06, "loss": 0.4668, "step": 5061 }, { "epoch": 0.7132088763649173, "grad_norm": 1.0604460987015798, "learning_rate": 4.011209563333248e-06, "loss": 0.8752, "step": 5062 }, { "epoch": 0.713349771046143, "grad_norm": 0.9560661952133691, "learning_rate": 4.0075554638726585e-06, "loss": 0.798, "step": 5063 }, { "epoch": 0.7134906657273687, "grad_norm": 0.9279237164344906, "learning_rate": 4.003902612433486e-06, "loss": 0.8615, "step": 5064 }, { "epoch": 0.7136315604085945, "grad_norm": 1.028642161755627, "learning_rate": 4.000251009776491e-06, "loss": 0.8714, "step": 5065 }, { "epoch": 0.7137724550898203, "grad_norm": 1.0058322960419912, "learning_rate": 3.996600656662177e-06, "loss": 0.8583, "step": 5066 }, { "epoch": 0.7139133497710461, "grad_norm": 1.1176972116349844, "learning_rate": 3.992951553850793e-06, "loss": 0.9066, "step": 5067 }, { "epoch": 0.714054244452272, "grad_norm": 0.9631021147043884, "learning_rate": 3.9893037021023175e-06, "loss": 0.8351, "step": 5068 }, { "epoch": 0.7141951391334977, "grad_norm": 1.1180363114019451, "learning_rate": 3.985657102176477e-06, "loss": 0.8239, "step": 5069 }, { "epoch": 0.7143360338147235, "grad_norm": 1.1922628628008067, "learning_rate": 3.982011754832733e-06, "loss": 0.5546, "step": 5070 }, { "epoch": 0.7144769284959492, "grad_norm": 0.9349808088038166, "learning_rate": 3.978367660830285e-06, "loss": 0.8814, "step": 5071 }, { "epoch": 0.714617823177175, "grad_norm": 0.8891628425154066, "learning_rate": 3.974724820928072e-06, "loss": 0.7946, "step": 5072 }, { "epoch": 0.7147587178584008, "grad_norm": 1.0265115719643811, "learning_rate": 3.971083235884777e-06, "loss": 0.8746, "step": 5073 }, { "epoch": 0.7148996125396266, "grad_norm": 1.0160536948547596, "learning_rate": 3.967442906458814e-06, "loss": 0.8196, "step": 5074 }, { "epoch": 0.7150405072208524, "grad_norm": 0.8834072130510515, "learning_rate": 3.963803833408348e-06, "loss": 0.836, "step": 5075 }, { "epoch": 0.7151814019020782, "grad_norm": 1.0434000462748556, "learning_rate": 3.9601660174912584e-06, "loss": 0.8108, "step": 5076 }, { "epoch": 0.7153222965833039, "grad_norm": 1.0082605908664208, "learning_rate": 3.95652945946519e-06, "loss": 0.8696, "step": 5077 }, { "epoch": 0.7154631912645297, "grad_norm": 1.1751913180154594, "learning_rate": 3.952894160087507e-06, "loss": 0.4758, "step": 5078 }, { "epoch": 0.7156040859457555, "grad_norm": 1.4012368893513223, "learning_rate": 3.949260120115323e-06, "loss": 0.5235, "step": 5079 }, { "epoch": 0.7157449806269813, "grad_norm": 1.373218252736316, "learning_rate": 3.945627340305484e-06, "loss": 0.5214, "step": 5080 }, { "epoch": 0.7158858753082071, "grad_norm": 0.9904099079967545, "learning_rate": 3.941995821414571e-06, "loss": 0.8537, "step": 5081 }, { "epoch": 0.7160267699894329, "grad_norm": 1.1109809889968223, "learning_rate": 3.938365564198903e-06, "loss": 0.8826, "step": 5082 }, { "epoch": 0.7161676646706587, "grad_norm": 1.3666413218884426, "learning_rate": 3.9347365694145445e-06, "loss": 0.5091, "step": 5083 }, { "epoch": 0.7163085593518844, "grad_norm": 1.114114745627179, "learning_rate": 3.931108837817289e-06, "loss": 0.8177, "step": 5084 }, { "epoch": 0.7164494540331102, "grad_norm": 1.2359917366716944, "learning_rate": 3.927482370162669e-06, "loss": 0.4676, "step": 5085 }, { "epoch": 0.716590348714336, "grad_norm": 0.9593450707467402, "learning_rate": 3.923857167205951e-06, "loss": 0.8846, "step": 5086 }, { "epoch": 0.7167312433955618, "grad_norm": 1.0824068334782693, "learning_rate": 3.9202332297021415e-06, "loss": 0.8584, "step": 5087 }, { "epoch": 0.7168721380767876, "grad_norm": 0.9828965981025415, "learning_rate": 3.9166105584059874e-06, "loss": 0.8426, "step": 5088 }, { "epoch": 0.7170130327580134, "grad_norm": 1.031190550289941, "learning_rate": 3.912989154071961e-06, "loss": 0.8476, "step": 5089 }, { "epoch": 0.7171539274392391, "grad_norm": 0.9460156444516562, "learning_rate": 3.909369017454284e-06, "loss": 0.8315, "step": 5090 }, { "epoch": 0.7172948221204649, "grad_norm": 1.2518157809805757, "learning_rate": 3.905750149306904e-06, "loss": 0.5749, "step": 5091 }, { "epoch": 0.7174357168016907, "grad_norm": 1.0511036094931903, "learning_rate": 3.902132550383507e-06, "loss": 0.8447, "step": 5092 }, { "epoch": 0.7175766114829165, "grad_norm": 1.1547016346146892, "learning_rate": 3.8985162214375115e-06, "loss": 0.8656, "step": 5093 }, { "epoch": 0.7177175061641423, "grad_norm": 1.0146581330956586, "learning_rate": 3.894901163222083e-06, "loss": 0.8477, "step": 5094 }, { "epoch": 0.7178584008453681, "grad_norm": 0.920768278725461, "learning_rate": 3.891287376490107e-06, "loss": 0.796, "step": 5095 }, { "epoch": 0.7179992955265939, "grad_norm": 0.9657317327684507, "learning_rate": 3.887674861994223e-06, "loss": 0.8577, "step": 5096 }, { "epoch": 0.7181401902078196, "grad_norm": 1.0381306907509182, "learning_rate": 3.884063620486778e-06, "loss": 0.8362, "step": 5097 }, { "epoch": 0.7182810848890454, "grad_norm": 1.0044694430954761, "learning_rate": 3.880453652719884e-06, "loss": 0.8578, "step": 5098 }, { "epoch": 0.7184219795702712, "grad_norm": 1.1525348021482467, "learning_rate": 3.876844959445366e-06, "loss": 0.809, "step": 5099 }, { "epoch": 0.718562874251497, "grad_norm": 1.2081347327925596, "learning_rate": 3.873237541414797e-06, "loss": 0.4956, "step": 5100 }, { "epoch": 0.7187037689327228, "grad_norm": 0.9974305093331098, "learning_rate": 3.869631399379476e-06, "loss": 0.7707, "step": 5101 }, { "epoch": 0.7188446636139486, "grad_norm": 1.1136173942899508, "learning_rate": 3.86602653409044e-06, "loss": 0.8894, "step": 5102 }, { "epoch": 0.7189855582951744, "grad_norm": 1.0047921439996954, "learning_rate": 3.862422946298455e-06, "loss": 0.855, "step": 5103 }, { "epoch": 0.7191264529764001, "grad_norm": 1.30678836985989, "learning_rate": 3.858820636754032e-06, "loss": 0.656, "step": 5104 }, { "epoch": 0.7192673476576259, "grad_norm": 1.1614284741482555, "learning_rate": 3.855219606207405e-06, "loss": 0.8597, "step": 5105 }, { "epoch": 0.7194082423388517, "grad_norm": 1.0644069163774412, "learning_rate": 3.8516198554085505e-06, "loss": 0.9043, "step": 5106 }, { "epoch": 0.7195491370200775, "grad_norm": 1.0443388733876366, "learning_rate": 3.8480213851071634e-06, "loss": 0.8904, "step": 5107 }, { "epoch": 0.7196900317013033, "grad_norm": 1.0933284945519142, "learning_rate": 3.8444241960526915e-06, "loss": 0.7727, "step": 5108 }, { "epoch": 0.7198309263825291, "grad_norm": 1.1892972003969273, "learning_rate": 3.8408282889943005e-06, "loss": 0.4986, "step": 5109 }, { "epoch": 0.7199718210637548, "grad_norm": 1.2555071404564424, "learning_rate": 3.837233664680901e-06, "loss": 0.5875, "step": 5110 }, { "epoch": 0.7201127157449806, "grad_norm": 0.9806939202313585, "learning_rate": 3.833640323861125e-06, "loss": 0.8143, "step": 5111 }, { "epoch": 0.7202536104262064, "grad_norm": 0.931136820744612, "learning_rate": 3.830048267283347e-06, "loss": 0.8383, "step": 5112 }, { "epoch": 0.7203945051074322, "grad_norm": 1.1910871516921813, "learning_rate": 3.8264574956956624e-06, "loss": 0.5301, "step": 5113 }, { "epoch": 0.720535399788658, "grad_norm": 1.0134484283248213, "learning_rate": 3.822868009845915e-06, "loss": 0.9046, "step": 5114 }, { "epoch": 0.7206762944698838, "grad_norm": 0.9727526513728477, "learning_rate": 3.819279810481664e-06, "loss": 0.853, "step": 5115 }, { "epoch": 0.7208171891511096, "grad_norm": 1.2475136410339693, "learning_rate": 3.81569289835022e-06, "loss": 0.4923, "step": 5116 }, { "epoch": 0.7209580838323353, "grad_norm": 0.9227451402900485, "learning_rate": 3.812107274198601e-06, "loss": 0.831, "step": 5117 }, { "epoch": 0.7210989785135611, "grad_norm": 1.3135849528200503, "learning_rate": 3.808522938773578e-06, "loss": 0.5081, "step": 5118 }, { "epoch": 0.7212398731947869, "grad_norm": 1.1135951319914628, "learning_rate": 3.8049398928216407e-06, "loss": 0.866, "step": 5119 }, { "epoch": 0.7213807678760127, "grad_norm": 1.071658377234787, "learning_rate": 3.8013581370890217e-06, "loss": 0.4117, "step": 5120 }, { "epoch": 0.7215216625572385, "grad_norm": 1.0187929718859245, "learning_rate": 3.797777672321674e-06, "loss": 0.8553, "step": 5121 }, { "epoch": 0.7216625572384643, "grad_norm": 1.1387232640948866, "learning_rate": 3.794198499265287e-06, "loss": 0.868, "step": 5122 }, { "epoch": 0.7218034519196901, "grad_norm": 1.0813557870072552, "learning_rate": 3.790620618665276e-06, "loss": 0.8455, "step": 5123 }, { "epoch": 0.7219443466009158, "grad_norm": 1.25002207707364, "learning_rate": 3.787044031266799e-06, "loss": 0.4489, "step": 5124 }, { "epoch": 0.7220852412821416, "grad_norm": 0.9824712120006361, "learning_rate": 3.7834687378147316e-06, "loss": 0.8042, "step": 5125 }, { "epoch": 0.7222261359633674, "grad_norm": 1.3570791849083927, "learning_rate": 3.7798947390536846e-06, "loss": 0.626, "step": 5126 }, { "epoch": 0.7223670306445932, "grad_norm": 1.1332704845232413, "learning_rate": 3.7763220357280085e-06, "loss": 0.8834, "step": 5127 }, { "epoch": 0.722507925325819, "grad_norm": 1.1404552409502724, "learning_rate": 3.772750628581762e-06, "loss": 0.9068, "step": 5128 }, { "epoch": 0.7226488200070448, "grad_norm": 1.3999917890364464, "learning_rate": 3.769180518358756e-06, "loss": 0.6238, "step": 5129 }, { "epoch": 0.7227897146882705, "grad_norm": 1.2671523822085655, "learning_rate": 3.7656117058025176e-06, "loss": 0.5331, "step": 5130 }, { "epoch": 0.7229306093694963, "grad_norm": 1.0516653654247055, "learning_rate": 3.762044191656314e-06, "loss": 0.8295, "step": 5131 }, { "epoch": 0.7230715040507221, "grad_norm": 1.0421714384001166, "learning_rate": 3.758477976663135e-06, "loss": 0.8391, "step": 5132 }, { "epoch": 0.7232123987319479, "grad_norm": 1.0993486318644483, "learning_rate": 3.754913061565699e-06, "loss": 0.8067, "step": 5133 }, { "epoch": 0.7233532934131737, "grad_norm": 0.927839482801225, "learning_rate": 3.7513494471064537e-06, "loss": 0.8025, "step": 5134 }, { "epoch": 0.7234941880943995, "grad_norm": 1.0359168349190326, "learning_rate": 3.747787134027584e-06, "loss": 0.8345, "step": 5135 }, { "epoch": 0.7236350827756253, "grad_norm": 1.1435806243422861, "learning_rate": 3.7442261230709933e-06, "loss": 0.742, "step": 5136 }, { "epoch": 0.723775977456851, "grad_norm": 1.2409169963275055, "learning_rate": 3.740666414978327e-06, "loss": 0.4649, "step": 5137 }, { "epoch": 0.7239168721380768, "grad_norm": 1.0515778803989981, "learning_rate": 3.7371080104909376e-06, "loss": 0.8781, "step": 5138 }, { "epoch": 0.7240577668193026, "grad_norm": 1.5088007928981118, "learning_rate": 3.733550910349928e-06, "loss": 0.4527, "step": 5139 }, { "epoch": 0.7241986615005284, "grad_norm": 1.0313967767397136, "learning_rate": 3.729995115296117e-06, "loss": 0.8263, "step": 5140 }, { "epoch": 0.7243395561817542, "grad_norm": 1.0262258222281488, "learning_rate": 3.726440626070058e-06, "loss": 0.7965, "step": 5141 }, { "epoch": 0.72448045086298, "grad_norm": 0.9856575070608193, "learning_rate": 3.7228874434120297e-06, "loss": 0.8759, "step": 5142 }, { "epoch": 0.7246213455442057, "grad_norm": 1.1161148195541777, "learning_rate": 3.719335568062037e-06, "loss": 0.8551, "step": 5143 }, { "epoch": 0.7247622402254315, "grad_norm": 0.8773157656834738, "learning_rate": 3.7157850007598096e-06, "loss": 0.838, "step": 5144 }, { "epoch": 0.7249031349066573, "grad_norm": 1.2695160772879786, "learning_rate": 3.7122357422448184e-06, "loss": 0.4915, "step": 5145 }, { "epoch": 0.725044029587883, "grad_norm": 1.019710606454619, "learning_rate": 3.7086877932562447e-06, "loss": 0.8429, "step": 5146 }, { "epoch": 0.7251849242691089, "grad_norm": 0.9945150424288466, "learning_rate": 3.705141154533016e-06, "loss": 0.8841, "step": 5147 }, { "epoch": 0.7253258189503347, "grad_norm": 0.9164855552554927, "learning_rate": 3.701595826813762e-06, "loss": 0.8515, "step": 5148 }, { "epoch": 0.7254667136315605, "grad_norm": 0.9944386722818856, "learning_rate": 3.6980518108368623e-06, "loss": 0.8122, "step": 5149 }, { "epoch": 0.7256076083127861, "grad_norm": 1.1719845195986083, "learning_rate": 3.6945091073404095e-06, "loss": 0.4813, "step": 5150 }, { "epoch": 0.725748502994012, "grad_norm": 1.1173873370945853, "learning_rate": 3.6909677170622328e-06, "loss": 0.8762, "step": 5151 }, { "epoch": 0.7258893976752377, "grad_norm": 1.0340012204445974, "learning_rate": 3.6874276407398803e-06, "loss": 0.9185, "step": 5152 }, { "epoch": 0.7260302923564635, "grad_norm": 1.3773464525559034, "learning_rate": 3.683888879110629e-06, "loss": 0.4757, "step": 5153 }, { "epoch": 0.7261711870376893, "grad_norm": 1.2605085603641475, "learning_rate": 3.6803514329114777e-06, "loss": 0.499, "step": 5154 }, { "epoch": 0.7263120817189151, "grad_norm": 0.9977248111736945, "learning_rate": 3.676815302879163e-06, "loss": 0.8311, "step": 5155 }, { "epoch": 0.726452976400141, "grad_norm": 1.2549853536192053, "learning_rate": 3.6732804897501327e-06, "loss": 0.6487, "step": 5156 }, { "epoch": 0.7265938710813666, "grad_norm": 1.442081550360718, "learning_rate": 3.6697469942605755e-06, "loss": 0.6327, "step": 5157 }, { "epoch": 0.7267347657625924, "grad_norm": 0.9516521327198261, "learning_rate": 3.666214817146394e-06, "loss": 0.8439, "step": 5158 }, { "epoch": 0.7268756604438182, "grad_norm": 1.2397066945708797, "learning_rate": 3.6626839591432193e-06, "loss": 0.5581, "step": 5159 }, { "epoch": 0.727016555125044, "grad_norm": 1.2311358237067762, "learning_rate": 3.659154420986405e-06, "loss": 0.4394, "step": 5160 }, { "epoch": 0.7271574498062698, "grad_norm": 1.0562944943735464, "learning_rate": 3.65562620341104e-06, "loss": 0.8515, "step": 5161 }, { "epoch": 0.7272983444874956, "grad_norm": 1.1075724191029883, "learning_rate": 3.6520993071519296e-06, "loss": 0.9058, "step": 5162 }, { "epoch": 0.7274392391687213, "grad_norm": 1.0533439192575849, "learning_rate": 3.6485737329436045e-06, "loss": 0.8697, "step": 5163 }, { "epoch": 0.7275801338499471, "grad_norm": 0.9479043253704637, "learning_rate": 3.6450494815203176e-06, "loss": 0.8157, "step": 5164 }, { "epoch": 0.7277210285311729, "grad_norm": 1.16500236348664, "learning_rate": 3.6415265536160584e-06, "loss": 0.9044, "step": 5165 }, { "epoch": 0.7278619232123987, "grad_norm": 0.9796815330015538, "learning_rate": 3.638004949964523e-06, "loss": 0.8511, "step": 5166 }, { "epoch": 0.7280028178936245, "grad_norm": 1.0297377261521736, "learning_rate": 3.634484671299151e-06, "loss": 0.8631, "step": 5167 }, { "epoch": 0.7281437125748503, "grad_norm": 0.9563773886655276, "learning_rate": 3.6309657183530933e-06, "loss": 0.8412, "step": 5168 }, { "epoch": 0.7282846072560761, "grad_norm": 1.2304893435183333, "learning_rate": 3.627448091859219e-06, "loss": 0.522, "step": 5169 }, { "epoch": 0.7284255019373018, "grad_norm": 1.0207251365880394, "learning_rate": 3.6239317925501393e-06, "loss": 0.9043, "step": 5170 }, { "epoch": 0.7285663966185276, "grad_norm": 1.0388910667676494, "learning_rate": 3.6204168211581724e-06, "loss": 0.8446, "step": 5171 }, { "epoch": 0.7287072912997534, "grad_norm": 0.9023803040181734, "learning_rate": 3.616903178415374e-06, "loss": 0.8825, "step": 5172 }, { "epoch": 0.7288481859809792, "grad_norm": 1.008976613768824, "learning_rate": 3.613390865053512e-06, "loss": 0.8529, "step": 5173 }, { "epoch": 0.728989080662205, "grad_norm": 1.05241584672971, "learning_rate": 3.6098798818040805e-06, "loss": 0.9046, "step": 5174 }, { "epoch": 0.7291299753434308, "grad_norm": 0.8508674968485195, "learning_rate": 3.606370229398295e-06, "loss": 0.8327, "step": 5175 }, { "epoch": 0.7292708700246565, "grad_norm": 0.9596546229059867, "learning_rate": 3.6028619085671023e-06, "loss": 0.8418, "step": 5176 }, { "epoch": 0.7294117647058823, "grad_norm": 0.9639123017502332, "learning_rate": 3.599354920041159e-06, "loss": 0.8355, "step": 5177 }, { "epoch": 0.7295526593871081, "grad_norm": 1.0938032734568794, "learning_rate": 3.5958492645508593e-06, "loss": 0.8804, "step": 5178 }, { "epoch": 0.7296935540683339, "grad_norm": 1.0322038582132602, "learning_rate": 3.5923449428263057e-06, "loss": 0.8257, "step": 5179 }, { "epoch": 0.7298344487495597, "grad_norm": 1.106900151486475, "learning_rate": 3.58884195559733e-06, "loss": 0.8602, "step": 5180 }, { "epoch": 0.7299753434307855, "grad_norm": 1.101303198858628, "learning_rate": 3.5853403035934795e-06, "loss": 0.8427, "step": 5181 }, { "epoch": 0.7301162381120113, "grad_norm": 0.9301631197730913, "learning_rate": 3.581839987544039e-06, "loss": 0.7879, "step": 5182 }, { "epoch": 0.730257132793237, "grad_norm": 1.3502722802721496, "learning_rate": 3.578341008177998e-06, "loss": 0.5688, "step": 5183 }, { "epoch": 0.7303980274744628, "grad_norm": 0.9511663108074125, "learning_rate": 3.5748433662240754e-06, "loss": 0.868, "step": 5184 }, { "epoch": 0.7305389221556886, "grad_norm": 0.904307592628937, "learning_rate": 3.5713470624107083e-06, "loss": 0.7818, "step": 5185 }, { "epoch": 0.7306798168369144, "grad_norm": 0.8829225482040773, "learning_rate": 3.5678520974660625e-06, "loss": 0.8042, "step": 5186 }, { "epoch": 0.7308207115181402, "grad_norm": 0.9598468186547889, "learning_rate": 3.5643584721180135e-06, "loss": 0.832, "step": 5187 }, { "epoch": 0.730961606199366, "grad_norm": 0.8737001297031649, "learning_rate": 3.5608661870941718e-06, "loss": 0.796, "step": 5188 }, { "epoch": 0.7311025008805918, "grad_norm": 1.0740263769402547, "learning_rate": 3.5573752431218555e-06, "loss": 0.848, "step": 5189 }, { "epoch": 0.7312433955618175, "grad_norm": 1.1788728934605837, "learning_rate": 3.553885640928111e-06, "loss": 0.8956, "step": 5190 }, { "epoch": 0.7313842902430433, "grad_norm": 1.0267831398214968, "learning_rate": 3.5503973812397e-06, "loss": 0.8506, "step": 5191 }, { "epoch": 0.7315251849242691, "grad_norm": 1.2541107480186853, "learning_rate": 3.546910464783111e-06, "loss": 0.4805, "step": 5192 }, { "epoch": 0.7316660796054949, "grad_norm": 1.1076261020968357, "learning_rate": 3.5434248922845505e-06, "loss": 0.8866, "step": 5193 }, { "epoch": 0.7318069742867207, "grad_norm": 1.0314224629031628, "learning_rate": 3.539940664469942e-06, "loss": 0.8122, "step": 5194 }, { "epoch": 0.7319478689679465, "grad_norm": 1.0297363370289394, "learning_rate": 3.5364577820649284e-06, "loss": 0.867, "step": 5195 }, { "epoch": 0.7320887636491722, "grad_norm": 1.0386652423510796, "learning_rate": 3.5329762457948803e-06, "loss": 0.8116, "step": 5196 }, { "epoch": 0.732229658330398, "grad_norm": 1.2296997089435588, "learning_rate": 3.5294960563848777e-06, "loss": 0.4578, "step": 5197 }, { "epoch": 0.7323705530116238, "grad_norm": 1.303708995173657, "learning_rate": 3.5260172145597314e-06, "loss": 0.4449, "step": 5198 }, { "epoch": 0.7325114476928496, "grad_norm": 1.1202902762178655, "learning_rate": 3.522539721043963e-06, "loss": 0.874, "step": 5199 }, { "epoch": 0.7326523423740754, "grad_norm": 1.3131053783093827, "learning_rate": 3.519063576561813e-06, "loss": 0.4977, "step": 5200 }, { "epoch": 0.7327932370553012, "grad_norm": 1.0675247223157893, "learning_rate": 3.5155887818372414e-06, "loss": 0.8598, "step": 5201 }, { "epoch": 0.732934131736527, "grad_norm": 1.115464364896787, "learning_rate": 3.5121153375939364e-06, "loss": 0.8304, "step": 5202 }, { "epoch": 0.7330750264177527, "grad_norm": 1.2240111770269089, "learning_rate": 3.5086432445552944e-06, "loss": 0.4558, "step": 5203 }, { "epoch": 0.7332159210989785, "grad_norm": 1.060246728809818, "learning_rate": 3.505172503444433e-06, "loss": 0.7996, "step": 5204 }, { "epoch": 0.7333568157802043, "grad_norm": 1.2020206842429553, "learning_rate": 3.501703114984186e-06, "loss": 0.7759, "step": 5205 }, { "epoch": 0.7334977104614301, "grad_norm": 1.011831049576529, "learning_rate": 3.4982350798971156e-06, "loss": 0.8879, "step": 5206 }, { "epoch": 0.7336386051426559, "grad_norm": 1.0479313854225742, "learning_rate": 3.494768398905487e-06, "loss": 0.8415, "step": 5207 }, { "epoch": 0.7337794998238817, "grad_norm": 0.8861335503909703, "learning_rate": 3.491303072731299e-06, "loss": 0.7691, "step": 5208 }, { "epoch": 0.7339203945051075, "grad_norm": 1.010595720116359, "learning_rate": 3.4878391020962577e-06, "loss": 0.8434, "step": 5209 }, { "epoch": 0.7340612891863332, "grad_norm": 1.1093103709918843, "learning_rate": 3.484376487721789e-06, "loss": 0.8542, "step": 5210 }, { "epoch": 0.734202183867559, "grad_norm": 1.426815061226442, "learning_rate": 3.480915230329037e-06, "loss": 0.5393, "step": 5211 }, { "epoch": 0.7343430785487848, "grad_norm": 1.2159158848903098, "learning_rate": 3.4774553306388616e-06, "loss": 0.4712, "step": 5212 }, { "epoch": 0.7344839732300106, "grad_norm": 1.152582119804516, "learning_rate": 3.4739967893718464e-06, "loss": 0.4593, "step": 5213 }, { "epoch": 0.7346248679112364, "grad_norm": 1.06994818453041, "learning_rate": 3.4705396072482843e-06, "loss": 0.8202, "step": 5214 }, { "epoch": 0.7347657625924622, "grad_norm": 0.9696208195319758, "learning_rate": 3.4670837849881887e-06, "loss": 0.8144, "step": 5215 }, { "epoch": 0.7349066572736879, "grad_norm": 1.2103897556719, "learning_rate": 3.463629323311287e-06, "loss": 0.4486, "step": 5216 }, { "epoch": 0.7350475519549137, "grad_norm": 1.068683020638997, "learning_rate": 3.4601762229370305e-06, "loss": 0.8662, "step": 5217 }, { "epoch": 0.7351884466361395, "grad_norm": 1.5211460179134217, "learning_rate": 3.456724484584576e-06, "loss": 0.6047, "step": 5218 }, { "epoch": 0.7353293413173653, "grad_norm": 1.0302763957141414, "learning_rate": 3.4532741089728094e-06, "loss": 0.8319, "step": 5219 }, { "epoch": 0.7354702359985911, "grad_norm": 1.007965354313176, "learning_rate": 3.4498250968203217e-06, "loss": 0.8104, "step": 5220 }, { "epoch": 0.7356111306798169, "grad_norm": 1.0520242293439455, "learning_rate": 3.4463774488454262e-06, "loss": 0.8165, "step": 5221 }, { "epoch": 0.7357520253610427, "grad_norm": 1.2697892998503186, "learning_rate": 3.442931165766146e-06, "loss": 0.5297, "step": 5222 }, { "epoch": 0.7358929200422684, "grad_norm": 1.375797659490066, "learning_rate": 3.439486248300229e-06, "loss": 0.4894, "step": 5223 }, { "epoch": 0.7360338147234942, "grad_norm": 1.286752895036414, "learning_rate": 3.436042697165133e-06, "loss": 0.482, "step": 5224 }, { "epoch": 0.73617470940472, "grad_norm": 1.1437304978895724, "learning_rate": 3.4326005130780304e-06, "loss": 0.835, "step": 5225 }, { "epoch": 0.7363156040859458, "grad_norm": 1.0372566111857635, "learning_rate": 3.4291596967558084e-06, "loss": 0.8996, "step": 5226 }, { "epoch": 0.7364564987671716, "grad_norm": 1.0846002577591072, "learning_rate": 3.4257202489150775e-06, "loss": 0.391, "step": 5227 }, { "epoch": 0.7365973934483974, "grad_norm": 1.0275212354597802, "learning_rate": 3.4222821702721497e-06, "loss": 0.8311, "step": 5228 }, { "epoch": 0.736738288129623, "grad_norm": 0.9746438429058321, "learning_rate": 3.418845461543068e-06, "loss": 0.8229, "step": 5229 }, { "epoch": 0.7368791828108489, "grad_norm": 0.9086934147039529, "learning_rate": 3.4154101234435754e-06, "loss": 0.8444, "step": 5230 }, { "epoch": 0.7370200774920747, "grad_norm": 1.3317998985062205, "learning_rate": 3.4119761566891376e-06, "loss": 0.5408, "step": 5231 }, { "epoch": 0.7371609721733005, "grad_norm": 1.055359374169135, "learning_rate": 3.4085435619949283e-06, "loss": 0.8395, "step": 5232 }, { "epoch": 0.7373018668545263, "grad_norm": 1.0856321458537992, "learning_rate": 3.4051123400758455e-06, "loss": 0.8468, "step": 5233 }, { "epoch": 0.7374427615357521, "grad_norm": 1.03156306545353, "learning_rate": 3.401682491646494e-06, "loss": 0.8427, "step": 5234 }, { "epoch": 0.7375836562169779, "grad_norm": 0.9644268065314501, "learning_rate": 3.398254017421193e-06, "loss": 0.8196, "step": 5235 }, { "epoch": 0.7377245508982035, "grad_norm": 0.9883613594366965, "learning_rate": 3.3948269181139726e-06, "loss": 0.8297, "step": 5236 }, { "epoch": 0.7378654455794293, "grad_norm": 1.1809387727118315, "learning_rate": 3.3914011944385883e-06, "loss": 0.4859, "step": 5237 }, { "epoch": 0.7380063402606551, "grad_norm": 0.9727435802877266, "learning_rate": 3.387976847108494e-06, "loss": 0.8098, "step": 5238 }, { "epoch": 0.738147234941881, "grad_norm": 1.0575779366977267, "learning_rate": 3.3845538768368712e-06, "loss": 0.831, "step": 5239 }, { "epoch": 0.7382881296231067, "grad_norm": 1.0526696307692647, "learning_rate": 3.3811322843366045e-06, "loss": 0.8312, "step": 5240 }, { "epoch": 0.7384290243043325, "grad_norm": 0.9931788346095196, "learning_rate": 3.377712070320294e-06, "loss": 0.8263, "step": 5241 }, { "epoch": 0.7385699189855583, "grad_norm": 1.3003303222421772, "learning_rate": 3.37429323550025e-06, "loss": 0.5597, "step": 5242 }, { "epoch": 0.738710813666784, "grad_norm": 1.0984717202665204, "learning_rate": 3.3708757805885062e-06, "loss": 0.849, "step": 5243 }, { "epoch": 0.7388517083480098, "grad_norm": 1.3002010548751755, "learning_rate": 3.367459706296795e-06, "loss": 0.6276, "step": 5244 }, { "epoch": 0.7389926030292356, "grad_norm": 1.1527632514634492, "learning_rate": 3.3640450133365775e-06, "loss": 0.4695, "step": 5245 }, { "epoch": 0.7391334977104614, "grad_norm": 1.2015265429834006, "learning_rate": 3.360631702419004e-06, "loss": 0.8806, "step": 5246 }, { "epoch": 0.7392743923916872, "grad_norm": 0.9646359796163242, "learning_rate": 3.3572197742549618e-06, "loss": 0.8417, "step": 5247 }, { "epoch": 0.739415287072913, "grad_norm": 0.9658757366199386, "learning_rate": 3.3538092295550305e-06, "loss": 0.8476, "step": 5248 }, { "epoch": 0.7395561817541387, "grad_norm": 0.9175309758862557, "learning_rate": 3.350400069029518e-06, "loss": 0.7991, "step": 5249 }, { "epoch": 0.7396970764353645, "grad_norm": 1.183490386138586, "learning_rate": 3.3469922933884326e-06, "loss": 0.8172, "step": 5250 }, { "epoch": 0.7398379711165903, "grad_norm": 1.0782414605863428, "learning_rate": 3.3435859033414973e-06, "loss": 0.8755, "step": 5251 }, { "epoch": 0.7399788657978161, "grad_norm": 0.9302542656519757, "learning_rate": 3.340180899598147e-06, "loss": 0.8333, "step": 5252 }, { "epoch": 0.7401197604790419, "grad_norm": 1.113547073608275, "learning_rate": 3.3367772828675226e-06, "loss": 0.4469, "step": 5253 }, { "epoch": 0.7402606551602677, "grad_norm": 1.0363495933939093, "learning_rate": 3.3333750538584897e-06, "loss": 0.8203, "step": 5254 }, { "epoch": 0.7404015498414935, "grad_norm": 0.9667974955076053, "learning_rate": 3.329974213279612e-06, "loss": 0.8676, "step": 5255 }, { "epoch": 0.7405424445227192, "grad_norm": 1.3898221668173523, "learning_rate": 3.3265747618391685e-06, "loss": 0.4328, "step": 5256 }, { "epoch": 0.740683339203945, "grad_norm": 1.1063686693161436, "learning_rate": 3.3231767002451466e-06, "loss": 0.4321, "step": 5257 }, { "epoch": 0.7408242338851708, "grad_norm": 1.2395573879517223, "learning_rate": 3.3197800292052516e-06, "loss": 0.4557, "step": 5258 }, { "epoch": 0.7409651285663966, "grad_norm": 1.1217130032049916, "learning_rate": 3.3163847494268874e-06, "loss": 0.3878, "step": 5259 }, { "epoch": 0.7411060232476224, "grad_norm": 1.0316048888648932, "learning_rate": 3.3129908616171834e-06, "loss": 0.849, "step": 5260 }, { "epoch": 0.7412469179288482, "grad_norm": 1.1250993669784664, "learning_rate": 3.3095983664829645e-06, "loss": 0.8373, "step": 5261 }, { "epoch": 0.7413878126100739, "grad_norm": 1.009272432899475, "learning_rate": 3.306207264730773e-06, "loss": 0.8956, "step": 5262 }, { "epoch": 0.7415287072912997, "grad_norm": 0.937456847249902, "learning_rate": 3.302817557066855e-06, "loss": 0.8458, "step": 5263 }, { "epoch": 0.7416696019725255, "grad_norm": 1.2990250044775917, "learning_rate": 3.29942924419718e-06, "loss": 0.4622, "step": 5264 }, { "epoch": 0.7418104966537513, "grad_norm": 0.9751206575582113, "learning_rate": 3.296042326827408e-06, "loss": 0.8337, "step": 5265 }, { "epoch": 0.7419513913349771, "grad_norm": 1.0421657146595258, "learning_rate": 3.2926568056629292e-06, "loss": 0.8529, "step": 5266 }, { "epoch": 0.7420922860162029, "grad_norm": 1.1369087042860253, "learning_rate": 3.2892726814088195e-06, "loss": 0.37, "step": 5267 }, { "epoch": 0.7422331806974287, "grad_norm": 0.9707517675962002, "learning_rate": 3.2858899547698864e-06, "loss": 0.7921, "step": 5268 }, { "epoch": 0.7423740753786544, "grad_norm": 0.9783671450993152, "learning_rate": 3.282508626450628e-06, "loss": 0.8309, "step": 5269 }, { "epoch": 0.7425149700598802, "grad_norm": 1.028130783820625, "learning_rate": 3.279128697155267e-06, "loss": 0.845, "step": 5270 }, { "epoch": 0.742655864741106, "grad_norm": 0.9894825354129462, "learning_rate": 3.2757501675877246e-06, "loss": 0.8656, "step": 5271 }, { "epoch": 0.7427967594223318, "grad_norm": 1.3099586342598686, "learning_rate": 3.272373038451633e-06, "loss": 0.476, "step": 5272 }, { "epoch": 0.7429376541035576, "grad_norm": 1.1531853562636232, "learning_rate": 3.268997310450327e-06, "loss": 0.8767, "step": 5273 }, { "epoch": 0.7430785487847834, "grad_norm": 1.1141471826416434, "learning_rate": 3.2656229842868647e-06, "loss": 0.8326, "step": 5274 }, { "epoch": 0.7432194434660092, "grad_norm": 0.9670497670889924, "learning_rate": 3.262250060663996e-06, "loss": 0.8551, "step": 5275 }, { "epoch": 0.7433603381472349, "grad_norm": 0.9749739760474715, "learning_rate": 3.258878540284194e-06, "loss": 0.8877, "step": 5276 }, { "epoch": 0.7435012328284607, "grad_norm": 0.9718614310991941, "learning_rate": 3.255508423849619e-06, "loss": 0.8328, "step": 5277 }, { "epoch": 0.7436421275096865, "grad_norm": 1.2608884207154414, "learning_rate": 3.2521397120621623e-06, "loss": 0.5731, "step": 5278 }, { "epoch": 0.7437830221909123, "grad_norm": 1.1550256592034978, "learning_rate": 3.2487724056234026e-06, "loss": 0.8079, "step": 5279 }, { "epoch": 0.7439239168721381, "grad_norm": 1.0984473202835139, "learning_rate": 3.2454065052346418e-06, "loss": 0.8467, "step": 5280 }, { "epoch": 0.7440648115533639, "grad_norm": 1.0984738525782045, "learning_rate": 3.2420420115968796e-06, "loss": 0.4008, "step": 5281 }, { "epoch": 0.7442057062345896, "grad_norm": 1.1243004923439284, "learning_rate": 3.2386789254108255e-06, "loss": 0.8433, "step": 5282 }, { "epoch": 0.7443466009158154, "grad_norm": 1.2003868023647546, "learning_rate": 3.235317247376889e-06, "loss": 0.8434, "step": 5283 }, { "epoch": 0.7444874955970412, "grad_norm": 0.9680368614998778, "learning_rate": 3.231956978195203e-06, "loss": 0.8153, "step": 5284 }, { "epoch": 0.744628390278267, "grad_norm": 1.0477594199239566, "learning_rate": 3.228598118565588e-06, "loss": 0.8357, "step": 5285 }, { "epoch": 0.7447692849594928, "grad_norm": 0.9510370451126517, "learning_rate": 3.2252406691875904e-06, "loss": 0.8333, "step": 5286 }, { "epoch": 0.7449101796407186, "grad_norm": 1.0542070659734135, "learning_rate": 3.221884630760439e-06, "loss": 0.8552, "step": 5287 }, { "epoch": 0.7450510743219444, "grad_norm": 1.1278622274305563, "learning_rate": 3.2185300039830913e-06, "loss": 0.8786, "step": 5288 }, { "epoch": 0.7451919690031701, "grad_norm": 1.2697386699958104, "learning_rate": 3.2151767895541964e-06, "loss": 0.5822, "step": 5289 }, { "epoch": 0.7453328636843959, "grad_norm": 1.3637201184420225, "learning_rate": 3.2118249881721187e-06, "loss": 0.4486, "step": 5290 }, { "epoch": 0.7454737583656217, "grad_norm": 1.3089891411482266, "learning_rate": 3.208474600534921e-06, "loss": 0.5612, "step": 5291 }, { "epoch": 0.7456146530468475, "grad_norm": 1.0736590655472105, "learning_rate": 3.2051256273403755e-06, "loss": 0.8789, "step": 5292 }, { "epoch": 0.7457555477280733, "grad_norm": 1.3303038016646922, "learning_rate": 3.201778069285958e-06, "loss": 0.5649, "step": 5293 }, { "epoch": 0.7458964424092991, "grad_norm": 0.9258248589540231, "learning_rate": 3.198431927068848e-06, "loss": 0.8618, "step": 5294 }, { "epoch": 0.7460373370905249, "grad_norm": 1.3014633925450272, "learning_rate": 3.195087201385938e-06, "loss": 0.5464, "step": 5295 }, { "epoch": 0.7461782317717506, "grad_norm": 1.0726893700168219, "learning_rate": 3.191743892933815e-06, "loss": 0.9182, "step": 5296 }, { "epoch": 0.7463191264529764, "grad_norm": 1.251770925963765, "learning_rate": 3.1884020024087846e-06, "loss": 0.5099, "step": 5297 }, { "epoch": 0.7464600211342022, "grad_norm": 0.9789957302246823, "learning_rate": 3.1850615305068368e-06, "loss": 0.8647, "step": 5298 }, { "epoch": 0.746600915815428, "grad_norm": 0.9975016974101548, "learning_rate": 3.1817224779236844e-06, "loss": 0.8425, "step": 5299 }, { "epoch": 0.7467418104966538, "grad_norm": 1.2659486201899581, "learning_rate": 3.178384845354735e-06, "loss": 0.5359, "step": 5300 }, { "epoch": 0.7468827051778796, "grad_norm": 1.042609438724479, "learning_rate": 3.175048633495108e-06, "loss": 0.8629, "step": 5301 }, { "epoch": 0.7470235998591053, "grad_norm": 0.9848690250229712, "learning_rate": 3.1717138430396187e-06, "loss": 0.8133, "step": 5302 }, { "epoch": 0.7471644945403311, "grad_norm": 1.0964983527717274, "learning_rate": 3.168380474682791e-06, "loss": 0.8669, "step": 5303 }, { "epoch": 0.7473053892215569, "grad_norm": 1.056510404679837, "learning_rate": 3.165048529118848e-06, "loss": 0.8167, "step": 5304 }, { "epoch": 0.7474462839027827, "grad_norm": 0.9062352267605107, "learning_rate": 3.1617180070417273e-06, "loss": 0.814, "step": 5305 }, { "epoch": 0.7475871785840085, "grad_norm": 0.9458110330494368, "learning_rate": 3.158388909145055e-06, "loss": 0.8023, "step": 5306 }, { "epoch": 0.7477280732652343, "grad_norm": 1.031317403057515, "learning_rate": 3.1550612361221786e-06, "loss": 0.8202, "step": 5307 }, { "epoch": 0.7478689679464601, "grad_norm": 0.9485211432410281, "learning_rate": 3.1517349886661276e-06, "loss": 0.8497, "step": 5308 }, { "epoch": 0.7480098626276858, "grad_norm": 1.1808835973413583, "learning_rate": 3.148410167469652e-06, "loss": 0.8292, "step": 5309 }, { "epoch": 0.7481507573089116, "grad_norm": 1.3439916949187516, "learning_rate": 3.1450867732251942e-06, "loss": 0.5223, "step": 5310 }, { "epoch": 0.7482916519901374, "grad_norm": 1.0063355059878405, "learning_rate": 3.1417648066249097e-06, "loss": 0.7847, "step": 5311 }, { "epoch": 0.7484325466713632, "grad_norm": 1.0600486227121904, "learning_rate": 3.1384442683606475e-06, "loss": 0.8671, "step": 5312 }, { "epoch": 0.748573441352589, "grad_norm": 1.2152945655894491, "learning_rate": 3.1351251591239617e-06, "loss": 0.4577, "step": 5313 }, { "epoch": 0.7487143360338148, "grad_norm": 1.0170105491076873, "learning_rate": 3.131807479606107e-06, "loss": 0.8673, "step": 5314 }, { "epoch": 0.7488552307150405, "grad_norm": 0.9695055721400377, "learning_rate": 3.1284912304980485e-06, "loss": 0.8052, "step": 5315 }, { "epoch": 0.7489961253962663, "grad_norm": 1.2548507843114707, "learning_rate": 3.1251764124904415e-06, "loss": 0.4919, "step": 5316 }, { "epoch": 0.749137020077492, "grad_norm": 1.350546582629187, "learning_rate": 3.121863026273658e-06, "loss": 0.5359, "step": 5317 }, { "epoch": 0.7492779147587179, "grad_norm": 1.2121398147356937, "learning_rate": 3.118551072537751e-06, "loss": 0.4797, "step": 5318 }, { "epoch": 0.7494188094399437, "grad_norm": 1.9618737708766723, "learning_rate": 3.1152405519724972e-06, "loss": 0.5332, "step": 5319 }, { "epoch": 0.7495597041211695, "grad_norm": 1.3598786040641304, "learning_rate": 3.111931465267357e-06, "loss": 0.4623, "step": 5320 }, { "epoch": 0.7497005988023953, "grad_norm": 1.28081794022375, "learning_rate": 3.1086238131115086e-06, "loss": 0.4958, "step": 5321 }, { "epoch": 0.749841493483621, "grad_norm": 1.1432147537911423, "learning_rate": 3.1053175961938185e-06, "loss": 0.8563, "step": 5322 }, { "epoch": 0.7499823881648467, "grad_norm": 1.1840267669406601, "learning_rate": 3.1020128152028594e-06, "loss": 0.4384, "step": 5323 }, { "epoch": 0.7501232828460725, "grad_norm": 1.1292822289383255, "learning_rate": 3.0987094708269006e-06, "loss": 0.8492, "step": 5324 }, { "epoch": 0.7502641775272983, "grad_norm": 1.1761895397586581, "learning_rate": 3.0954075637539216e-06, "loss": 0.8249, "step": 5325 }, { "epoch": 0.7504050722085241, "grad_norm": 1.0885026713909574, "learning_rate": 3.0921070946715914e-06, "loss": 0.8734, "step": 5326 }, { "epoch": 0.75054596688975, "grad_norm": 1.1085598769547518, "learning_rate": 3.0888080642672914e-06, "loss": 0.8457, "step": 5327 }, { "epoch": 0.7506868615709757, "grad_norm": 1.0799302734202092, "learning_rate": 3.0855104732280925e-06, "loss": 0.8657, "step": 5328 }, { "epoch": 0.7508277562522014, "grad_norm": 1.080122813132561, "learning_rate": 3.0822143222407708e-06, "loss": 0.8139, "step": 5329 }, { "epoch": 0.7509686509334272, "grad_norm": 1.011994800959488, "learning_rate": 3.0789196119917997e-06, "loss": 0.8345, "step": 5330 }, { "epoch": 0.751109545614653, "grad_norm": 1.2081681889463551, "learning_rate": 3.0756263431673595e-06, "loss": 0.5374, "step": 5331 }, { "epoch": 0.7512504402958788, "grad_norm": 0.8820246913010755, "learning_rate": 3.0723345164533236e-06, "loss": 0.8238, "step": 5332 }, { "epoch": 0.7513913349771046, "grad_norm": 1.0124868796524806, "learning_rate": 3.069044132535267e-06, "loss": 0.8224, "step": 5333 }, { "epoch": 0.7515322296583304, "grad_norm": 0.947407362601705, "learning_rate": 3.0657551920984596e-06, "loss": 0.8644, "step": 5334 }, { "epoch": 0.7516731243395561, "grad_norm": 1.3041206692847411, "learning_rate": 3.0624676958278834e-06, "loss": 0.4618, "step": 5335 }, { "epoch": 0.7518140190207819, "grad_norm": 1.2086678739652477, "learning_rate": 3.059181644408208e-06, "loss": 0.5719, "step": 5336 }, { "epoch": 0.7519549137020077, "grad_norm": 1.4582583005390661, "learning_rate": 3.0558970385238017e-06, "loss": 0.5307, "step": 5337 }, { "epoch": 0.7520958083832335, "grad_norm": 1.276957078370675, "learning_rate": 3.0526138788587457e-06, "loss": 0.5934, "step": 5338 }, { "epoch": 0.7522367030644593, "grad_norm": 1.0695554056090544, "learning_rate": 3.0493321660967966e-06, "loss": 0.9115, "step": 5339 }, { "epoch": 0.7523775977456851, "grad_norm": 0.9869604599212558, "learning_rate": 3.046051900921434e-06, "loss": 0.799, "step": 5340 }, { "epoch": 0.7525184924269109, "grad_norm": 1.3774206721712816, "learning_rate": 3.0427730840158163e-06, "loss": 0.4688, "step": 5341 }, { "epoch": 0.7526593871081366, "grad_norm": 1.0771979160797853, "learning_rate": 3.039495716062818e-06, "loss": 0.8549, "step": 5342 }, { "epoch": 0.7528002817893624, "grad_norm": 1.0726410653017469, "learning_rate": 3.0362197977449993e-06, "loss": 0.8582, "step": 5343 }, { "epoch": 0.7529411764705882, "grad_norm": 1.017786541474906, "learning_rate": 3.0329453297446197e-06, "loss": 0.8574, "step": 5344 }, { "epoch": 0.753082071151814, "grad_norm": 1.2502090782877804, "learning_rate": 3.0296723127436377e-06, "loss": 0.4395, "step": 5345 }, { "epoch": 0.7532229658330398, "grad_norm": 1.0211466415216872, "learning_rate": 3.026400747423718e-06, "loss": 0.8394, "step": 5346 }, { "epoch": 0.7533638605142656, "grad_norm": 1.0507525438269336, "learning_rate": 3.0231306344662072e-06, "loss": 0.8719, "step": 5347 }, { "epoch": 0.7535047551954913, "grad_norm": 1.0007895883690292, "learning_rate": 3.019861974552165e-06, "loss": 0.8507, "step": 5348 }, { "epoch": 0.7536456498767171, "grad_norm": 1.238589985710069, "learning_rate": 3.016594768362341e-06, "loss": 0.4809, "step": 5349 }, { "epoch": 0.7537865445579429, "grad_norm": 1.1005252653661912, "learning_rate": 3.013329016577179e-06, "loss": 0.7896, "step": 5350 }, { "epoch": 0.7539274392391687, "grad_norm": 1.288251487964592, "learning_rate": 3.010064719876823e-06, "loss": 0.4992, "step": 5351 }, { "epoch": 0.7540683339203945, "grad_norm": 1.2948862825660024, "learning_rate": 3.0068018789411192e-06, "loss": 0.4467, "step": 5352 }, { "epoch": 0.7542092286016203, "grad_norm": 1.0142532635680026, "learning_rate": 3.003540494449603e-06, "loss": 0.7975, "step": 5353 }, { "epoch": 0.7543501232828461, "grad_norm": 1.0623491739066138, "learning_rate": 3.0002805670815094e-06, "loss": 0.8434, "step": 5354 }, { "epoch": 0.7544910179640718, "grad_norm": 1.1181700179037068, "learning_rate": 2.997022097515767e-06, "loss": 0.8689, "step": 5355 }, { "epoch": 0.7546319126452976, "grad_norm": 0.8889205338521067, "learning_rate": 2.993765086431011e-06, "loss": 0.8326, "step": 5356 }, { "epoch": 0.7547728073265234, "grad_norm": 1.2840571999928265, "learning_rate": 2.9905095345055568e-06, "loss": 0.4618, "step": 5357 }, { "epoch": 0.7549137020077492, "grad_norm": 1.0072038905187208, "learning_rate": 2.9872554424174328e-06, "loss": 0.8671, "step": 5358 }, { "epoch": 0.755054596688975, "grad_norm": 1.1076161148668129, "learning_rate": 2.9840028108443508e-06, "loss": 0.9108, "step": 5359 }, { "epoch": 0.7551954913702008, "grad_norm": 1.053298610815456, "learning_rate": 2.9807516404637237e-06, "loss": 0.8755, "step": 5360 }, { "epoch": 0.7553363860514266, "grad_norm": 0.9309979143934857, "learning_rate": 2.9775019319526565e-06, "loss": 0.8378, "step": 5361 }, { "epoch": 0.7554772807326523, "grad_norm": 1.1389652286546326, "learning_rate": 2.974253685987957e-06, "loss": 0.4391, "step": 5362 }, { "epoch": 0.7556181754138781, "grad_norm": 1.171746149505366, "learning_rate": 2.971006903246122e-06, "loss": 0.4511, "step": 5363 }, { "epoch": 0.7557590700951039, "grad_norm": 0.9673431929742092, "learning_rate": 2.9677615844033446e-06, "loss": 0.8805, "step": 5364 }, { "epoch": 0.7558999647763297, "grad_norm": 1.24501484298646, "learning_rate": 2.964517730135511e-06, "loss": 0.513, "step": 5365 }, { "epoch": 0.7560408594575555, "grad_norm": 1.1462744856678124, "learning_rate": 2.9612753411182125e-06, "loss": 0.5281, "step": 5366 }, { "epoch": 0.7561817541387813, "grad_norm": 1.045671408819998, "learning_rate": 2.9580344180267197e-06, "loss": 0.826, "step": 5367 }, { "epoch": 0.756322648820007, "grad_norm": 1.05164162304337, "learning_rate": 2.9547949615360138e-06, "loss": 0.8876, "step": 5368 }, { "epoch": 0.7564635435012328, "grad_norm": 1.2567203117070649, "learning_rate": 2.951556972320759e-06, "loss": 0.484, "step": 5369 }, { "epoch": 0.7566044381824586, "grad_norm": 1.0495995517620826, "learning_rate": 2.9483204510553197e-06, "loss": 0.9066, "step": 5370 }, { "epoch": 0.7567453328636844, "grad_norm": 0.9840849870099667, "learning_rate": 2.945085398413746e-06, "loss": 0.8425, "step": 5371 }, { "epoch": 0.7568862275449102, "grad_norm": 0.885945661428144, "learning_rate": 2.9418518150697984e-06, "loss": 0.8227, "step": 5372 }, { "epoch": 0.757027122226136, "grad_norm": 1.353929603298168, "learning_rate": 2.9386197016969174e-06, "loss": 0.4835, "step": 5373 }, { "epoch": 0.7571680169073618, "grad_norm": 1.029053533859629, "learning_rate": 2.935389058968241e-06, "loss": 0.825, "step": 5374 }, { "epoch": 0.7573089115885875, "grad_norm": 0.9168497754656346, "learning_rate": 2.9321598875566005e-06, "loss": 0.7783, "step": 5375 }, { "epoch": 0.7574498062698133, "grad_norm": 1.0368908303240179, "learning_rate": 2.9289321881345257e-06, "loss": 0.8943, "step": 5376 }, { "epoch": 0.7575907009510391, "grad_norm": 0.9771715018139876, "learning_rate": 2.9257059613742345e-06, "loss": 0.8773, "step": 5377 }, { "epoch": 0.7577315956322649, "grad_norm": 0.9871187037648624, "learning_rate": 2.922481207947636e-06, "loss": 0.8498, "step": 5378 }, { "epoch": 0.7578724903134907, "grad_norm": 1.296364997868256, "learning_rate": 2.9192579285263433e-06, "loss": 0.5438, "step": 5379 }, { "epoch": 0.7580133849947165, "grad_norm": 1.1686820826849755, "learning_rate": 2.9160361237816526e-06, "loss": 0.4397, "step": 5380 }, { "epoch": 0.7581542796759423, "grad_norm": 0.9729091000972284, "learning_rate": 2.9128157943845536e-06, "loss": 0.849, "step": 5381 }, { "epoch": 0.758295174357168, "grad_norm": 1.234100208784816, "learning_rate": 2.9095969410057288e-06, "loss": 0.5465, "step": 5382 }, { "epoch": 0.7584360690383938, "grad_norm": 0.9988328375279661, "learning_rate": 2.906379564315561e-06, "loss": 0.8644, "step": 5383 }, { "epoch": 0.7585769637196196, "grad_norm": 1.0531341108170629, "learning_rate": 2.9031636649841188e-06, "loss": 0.8637, "step": 5384 }, { "epoch": 0.7587178584008454, "grad_norm": 1.0597389660473435, "learning_rate": 2.8999492436811627e-06, "loss": 0.8769, "step": 5385 }, { "epoch": 0.7588587530820712, "grad_norm": 1.252232126466801, "learning_rate": 2.8967363010761428e-06, "loss": 0.4554, "step": 5386 }, { "epoch": 0.758999647763297, "grad_norm": 1.0175082525516996, "learning_rate": 2.893524837838213e-06, "loss": 0.8132, "step": 5387 }, { "epoch": 0.7591405424445227, "grad_norm": 1.033530671327298, "learning_rate": 2.8903148546362047e-06, "loss": 0.8219, "step": 5388 }, { "epoch": 0.7592814371257485, "grad_norm": 0.8913375447955499, "learning_rate": 2.8871063521386534e-06, "loss": 0.8185, "step": 5389 }, { "epoch": 0.7594223318069743, "grad_norm": 1.052843341457946, "learning_rate": 2.8838993310137777e-06, "loss": 0.8324, "step": 5390 }, { "epoch": 0.7595632264882001, "grad_norm": 1.104056190747951, "learning_rate": 2.880693791929491e-06, "loss": 0.396, "step": 5391 }, { "epoch": 0.7597041211694259, "grad_norm": 1.083538242131362, "learning_rate": 2.8774897355533926e-06, "loss": 0.8906, "step": 5392 }, { "epoch": 0.7598450158506517, "grad_norm": 1.5078268546207003, "learning_rate": 2.874287162552787e-06, "loss": 0.5133, "step": 5393 }, { "epoch": 0.7599859105318775, "grad_norm": 1.8413340122440056, "learning_rate": 2.8710860735946554e-06, "loss": 0.5987, "step": 5394 }, { "epoch": 0.7601268052131032, "grad_norm": 1.0716804713858343, "learning_rate": 2.8678864693456776e-06, "loss": 0.8799, "step": 5395 }, { "epoch": 0.760267699894329, "grad_norm": 1.1868550996769953, "learning_rate": 2.8646883504722155e-06, "loss": 0.5481, "step": 5396 }, { "epoch": 0.7604085945755548, "grad_norm": 1.2238184907418161, "learning_rate": 2.8614917176403365e-06, "loss": 0.5701, "step": 5397 }, { "epoch": 0.7605494892567806, "grad_norm": 0.9884309062862529, "learning_rate": 2.8582965715157827e-06, "loss": 0.8612, "step": 5398 }, { "epoch": 0.7606903839380064, "grad_norm": 0.9677284270089285, "learning_rate": 2.855102912764001e-06, "loss": 0.8564, "step": 5399 }, { "epoch": 0.7608312786192322, "grad_norm": 1.3932804212747592, "learning_rate": 2.8519107420501168e-06, "loss": 0.4234, "step": 5400 }, { "epoch": 0.7609721733004579, "grad_norm": 1.2967358375972768, "learning_rate": 2.848720060038952e-06, "loss": 0.4964, "step": 5401 }, { "epoch": 0.7611130679816837, "grad_norm": 1.2630156578686762, "learning_rate": 2.845530867395012e-06, "loss": 0.5499, "step": 5402 }, { "epoch": 0.7612539626629095, "grad_norm": 1.0711400935504451, "learning_rate": 2.842343164782504e-06, "loss": 0.7871, "step": 5403 }, { "epoch": 0.7613948573441353, "grad_norm": 0.9421127378123467, "learning_rate": 2.839156952865314e-06, "loss": 0.8247, "step": 5404 }, { "epoch": 0.7615357520253611, "grad_norm": 0.9091724517653654, "learning_rate": 2.8359722323070203e-06, "loss": 0.7745, "step": 5405 }, { "epoch": 0.7616766467065869, "grad_norm": 1.045271017331252, "learning_rate": 2.8327890037708896e-06, "loss": 0.831, "step": 5406 }, { "epoch": 0.7618175413878127, "grad_norm": 1.2287547607862956, "learning_rate": 2.829607267919884e-06, "loss": 0.471, "step": 5407 }, { "epoch": 0.7619584360690383, "grad_norm": 0.9450462301915855, "learning_rate": 2.826427025416646e-06, "loss": 0.8442, "step": 5408 }, { "epoch": 0.7620993307502641, "grad_norm": 1.0445912318659594, "learning_rate": 2.823248276923517e-06, "loss": 0.8021, "step": 5409 }, { "epoch": 0.76224022543149, "grad_norm": 1.0095012775569663, "learning_rate": 2.8200710231025184e-06, "loss": 0.8397, "step": 5410 }, { "epoch": 0.7623811201127157, "grad_norm": 1.2153009340825291, "learning_rate": 2.8168952646153634e-06, "loss": 0.6403, "step": 5411 }, { "epoch": 0.7625220147939415, "grad_norm": 1.0651476171541348, "learning_rate": 2.8137210021234517e-06, "loss": 0.8398, "step": 5412 }, { "epoch": 0.7626629094751673, "grad_norm": 1.310453407318134, "learning_rate": 2.810548236287879e-06, "loss": 0.5421, "step": 5413 }, { "epoch": 0.7628038041563932, "grad_norm": 1.081818750709734, "learning_rate": 2.8073769677694185e-06, "loss": 0.8358, "step": 5414 }, { "epoch": 0.7629446988376188, "grad_norm": 1.0500779258547088, "learning_rate": 2.8042071972285455e-06, "loss": 0.8471, "step": 5415 }, { "epoch": 0.7630855935188446, "grad_norm": 1.264579258898312, "learning_rate": 2.8010389253254034e-06, "loss": 0.6424, "step": 5416 }, { "epoch": 0.7632264882000704, "grad_norm": 1.0058843400061792, "learning_rate": 2.797872152719844e-06, "loss": 0.8205, "step": 5417 }, { "epoch": 0.7633673828812962, "grad_norm": 1.2538819203770106, "learning_rate": 2.794706880071394e-06, "loss": 0.4624, "step": 5418 }, { "epoch": 0.763508277562522, "grad_norm": 1.2361843818902019, "learning_rate": 2.7915431080392684e-06, "loss": 0.5526, "step": 5419 }, { "epoch": 0.7636491722437478, "grad_norm": 0.9853356973644479, "learning_rate": 2.7883808372823806e-06, "loss": 0.825, "step": 5420 }, { "epoch": 0.7637900669249735, "grad_norm": 1.181537332781588, "learning_rate": 2.78522006845932e-06, "loss": 0.8963, "step": 5421 }, { "epoch": 0.7639309616061993, "grad_norm": 1.3956954792139824, "learning_rate": 2.7820608022283647e-06, "loss": 0.5688, "step": 5422 }, { "epoch": 0.7640718562874251, "grad_norm": 1.1397082037922208, "learning_rate": 2.7789030392474813e-06, "loss": 0.8703, "step": 5423 }, { "epoch": 0.7642127509686509, "grad_norm": 1.1617855932690162, "learning_rate": 2.775746780174329e-06, "loss": 0.9104, "step": 5424 }, { "epoch": 0.7643536456498767, "grad_norm": 0.9304418394257022, "learning_rate": 2.7725920256662453e-06, "loss": 0.8895, "step": 5425 }, { "epoch": 0.7644945403311025, "grad_norm": 1.0557843991498124, "learning_rate": 2.7694387763802587e-06, "loss": 0.8456, "step": 5426 }, { "epoch": 0.7646354350123283, "grad_norm": 1.0248110822752319, "learning_rate": 2.7662870329730783e-06, "loss": 0.8141, "step": 5427 }, { "epoch": 0.764776329693554, "grad_norm": 0.9530217308717357, "learning_rate": 2.7631367961011136e-06, "loss": 0.8429, "step": 5428 }, { "epoch": 0.7649172243747798, "grad_norm": 1.0165919178613603, "learning_rate": 2.7599880664204425e-06, "loss": 0.8302, "step": 5429 }, { "epoch": 0.7650581190560056, "grad_norm": 1.0764878641567068, "learning_rate": 2.7568408445868455e-06, "loss": 0.8506, "step": 5430 }, { "epoch": 0.7651990137372314, "grad_norm": 1.1771372253989698, "learning_rate": 2.753695131255778e-06, "loss": 0.8002, "step": 5431 }, { "epoch": 0.7653399084184572, "grad_norm": 1.064915377694025, "learning_rate": 2.7505509270823836e-06, "loss": 0.8715, "step": 5432 }, { "epoch": 0.765480803099683, "grad_norm": 0.9412121611683661, "learning_rate": 2.747408232721491e-06, "loss": 0.8519, "step": 5433 }, { "epoch": 0.7656216977809087, "grad_norm": 1.2314161203488798, "learning_rate": 2.744267048827621e-06, "loss": 0.4364, "step": 5434 }, { "epoch": 0.7657625924621345, "grad_norm": 0.9705325093926702, "learning_rate": 2.7411273760549696e-06, "loss": 0.8738, "step": 5435 }, { "epoch": 0.7659034871433603, "grad_norm": 1.02094468194862, "learning_rate": 2.7379892150574316e-06, "loss": 0.7957, "step": 5436 }, { "epoch": 0.7660443818245861, "grad_norm": 1.211232018602319, "learning_rate": 2.7348525664885673e-06, "loss": 0.5477, "step": 5437 }, { "epoch": 0.7661852765058119, "grad_norm": 1.0711422524147938, "learning_rate": 2.7317174310016427e-06, "loss": 0.9002, "step": 5438 }, { "epoch": 0.7663261711870377, "grad_norm": 1.031046695873207, "learning_rate": 2.728583809249592e-06, "loss": 0.8628, "step": 5439 }, { "epoch": 0.7664670658682635, "grad_norm": 1.2785835949402002, "learning_rate": 2.725451701885049e-06, "loss": 0.4626, "step": 5440 }, { "epoch": 0.7666079605494892, "grad_norm": 0.9669906512559232, "learning_rate": 2.7223211095603208e-06, "loss": 0.8423, "step": 5441 }, { "epoch": 0.766748855230715, "grad_norm": 1.2565273042571812, "learning_rate": 2.7191920329274037e-06, "loss": 0.5253, "step": 5442 }, { "epoch": 0.7668897499119408, "grad_norm": 1.1717437914325943, "learning_rate": 2.7160644726379725e-06, "loss": 0.5047, "step": 5443 }, { "epoch": 0.7670306445931666, "grad_norm": 1.2678763391027317, "learning_rate": 2.7129384293433993e-06, "loss": 0.4726, "step": 5444 }, { "epoch": 0.7671715392743924, "grad_norm": 1.0808941977494402, "learning_rate": 2.7098139036947246e-06, "loss": 0.8666, "step": 5445 }, { "epoch": 0.7673124339556182, "grad_norm": 1.0621060205421178, "learning_rate": 2.7066908963426908e-06, "loss": 0.8685, "step": 5446 }, { "epoch": 0.767453328636844, "grad_norm": 1.0137955884713992, "learning_rate": 2.7035694079376997e-06, "loss": 0.8207, "step": 5447 }, { "epoch": 0.7675942233180697, "grad_norm": 1.2090966874763973, "learning_rate": 2.7004494391298606e-06, "loss": 0.8781, "step": 5448 }, { "epoch": 0.7677351179992955, "grad_norm": 1.2130490826319864, "learning_rate": 2.6973309905689516e-06, "loss": 0.4825, "step": 5449 }, { "epoch": 0.7678760126805213, "grad_norm": 0.9981483813863864, "learning_rate": 2.6942140629044435e-06, "loss": 0.8222, "step": 5450 }, { "epoch": 0.7680169073617471, "grad_norm": 1.031833321550168, "learning_rate": 2.6910986567854826e-06, "loss": 0.7974, "step": 5451 }, { "epoch": 0.7681578020429729, "grad_norm": 1.055647725225438, "learning_rate": 2.687984772860902e-06, "loss": 0.8412, "step": 5452 }, { "epoch": 0.7682986967241987, "grad_norm": 1.2217475456256066, "learning_rate": 2.6848724117792147e-06, "loss": 0.4871, "step": 5453 }, { "epoch": 0.7684395914054244, "grad_norm": 1.0108460504889927, "learning_rate": 2.681761574188625e-06, "loss": 0.8054, "step": 5454 }, { "epoch": 0.7685804860866502, "grad_norm": 1.1304150869405818, "learning_rate": 2.678652260737008e-06, "loss": 0.5697, "step": 5455 }, { "epoch": 0.768721380767876, "grad_norm": 1.4325491609519756, "learning_rate": 2.675544472071936e-06, "loss": 0.4986, "step": 5456 }, { "epoch": 0.7688622754491018, "grad_norm": 1.0923355254945841, "learning_rate": 2.6724382088406443e-06, "loss": 0.484, "step": 5457 }, { "epoch": 0.7690031701303276, "grad_norm": 1.077968450587212, "learning_rate": 2.669333471690069e-06, "loss": 0.8399, "step": 5458 }, { "epoch": 0.7691440648115534, "grad_norm": 1.4028131117834366, "learning_rate": 2.6662302612668145e-06, "loss": 0.4583, "step": 5459 }, { "epoch": 0.7692849594927792, "grad_norm": 0.9971435124107684, "learning_rate": 2.663128578217181e-06, "loss": 0.8015, "step": 5460 }, { "epoch": 0.7694258541740049, "grad_norm": 1.0927587726511145, "learning_rate": 2.66002842318714e-06, "loss": 0.8731, "step": 5461 }, { "epoch": 0.7695667488552307, "grad_norm": 1.407067061675532, "learning_rate": 2.656929796822346e-06, "loss": 0.5639, "step": 5462 }, { "epoch": 0.7697076435364565, "grad_norm": 0.951902590521167, "learning_rate": 2.6538326997681395e-06, "loss": 0.8196, "step": 5463 }, { "epoch": 0.7698485382176823, "grad_norm": 1.1361632471050371, "learning_rate": 2.6507371326695354e-06, "loss": 0.9013, "step": 5464 }, { "epoch": 0.7699894328989081, "grad_norm": 1.393523502532471, "learning_rate": 2.6476430961712395e-06, "loss": 0.484, "step": 5465 }, { "epoch": 0.7701303275801339, "grad_norm": 1.0320792048317238, "learning_rate": 2.6445505909176304e-06, "loss": 0.8582, "step": 5466 }, { "epoch": 0.7702712222613596, "grad_norm": 0.9765077700481795, "learning_rate": 2.6414596175527796e-06, "loss": 0.8472, "step": 5467 }, { "epoch": 0.7704121169425854, "grad_norm": 1.102184114735619, "learning_rate": 2.6383701767204183e-06, "loss": 0.8665, "step": 5468 }, { "epoch": 0.7705530116238112, "grad_norm": 0.9109989341806749, "learning_rate": 2.63528226906398e-06, "loss": 0.8168, "step": 5469 }, { "epoch": 0.770693906305037, "grad_norm": 1.1307209372457148, "learning_rate": 2.632195895226567e-06, "loss": 0.7971, "step": 5470 }, { "epoch": 0.7708348009862628, "grad_norm": 1.2341959563841127, "learning_rate": 2.629111055850968e-06, "loss": 0.8092, "step": 5471 }, { "epoch": 0.7709756956674886, "grad_norm": 1.2653486415189337, "learning_rate": 2.6260277515796495e-06, "loss": 0.4589, "step": 5472 }, { "epoch": 0.7711165903487144, "grad_norm": 1.0798571192614708, "learning_rate": 2.6229459830547564e-06, "loss": 0.8412, "step": 5473 }, { "epoch": 0.7712574850299401, "grad_norm": 0.9732085959472819, "learning_rate": 2.6198657509181136e-06, "loss": 0.8983, "step": 5474 }, { "epoch": 0.7713983797111659, "grad_norm": 1.0427649112845438, "learning_rate": 2.616787055811235e-06, "loss": 0.8401, "step": 5475 }, { "epoch": 0.7715392743923917, "grad_norm": 1.2294743142909197, "learning_rate": 2.6137098983753006e-06, "loss": 0.4576, "step": 5476 }, { "epoch": 0.7716801690736175, "grad_norm": 1.2026200073237665, "learning_rate": 2.610634279251185e-06, "loss": 0.5522, "step": 5477 }, { "epoch": 0.7718210637548433, "grad_norm": 1.2800681192921837, "learning_rate": 2.6075601990794254e-06, "loss": 0.4984, "step": 5478 }, { "epoch": 0.7719619584360691, "grad_norm": 0.9976386094310024, "learning_rate": 2.6044876585002544e-06, "loss": 0.865, "step": 5479 }, { "epoch": 0.7721028531172949, "grad_norm": 1.035783369555657, "learning_rate": 2.601416658153573e-06, "loss": 0.8599, "step": 5480 }, { "epoch": 0.7722437477985206, "grad_norm": 1.048850764315569, "learning_rate": 2.598347198678969e-06, "loss": 0.8874, "step": 5481 }, { "epoch": 0.7723846424797464, "grad_norm": 0.9478404125441796, "learning_rate": 2.5952792807157057e-06, "loss": 0.889, "step": 5482 }, { "epoch": 0.7725255371609722, "grad_norm": 0.9978009630259871, "learning_rate": 2.5922129049027234e-06, "loss": 0.789, "step": 5483 }, { "epoch": 0.772666431842198, "grad_norm": 1.1844967671582394, "learning_rate": 2.589148071878642e-06, "loss": 0.8884, "step": 5484 }, { "epoch": 0.7728073265234238, "grad_norm": 0.9433852975101694, "learning_rate": 2.5860847822817657e-06, "loss": 0.7894, "step": 5485 }, { "epoch": 0.7729482212046496, "grad_norm": 0.9807527716016747, "learning_rate": 2.58302303675007e-06, "loss": 0.8033, "step": 5486 }, { "epoch": 0.7730891158858753, "grad_norm": 0.9519247540509914, "learning_rate": 2.579962835921218e-06, "loss": 0.83, "step": 5487 }, { "epoch": 0.7732300105671011, "grad_norm": 1.0097795674500132, "learning_rate": 2.5769041804325333e-06, "loss": 0.8435, "step": 5488 }, { "epoch": 0.7733709052483269, "grad_norm": 0.8924823024428462, "learning_rate": 2.5738470709210404e-06, "loss": 0.8147, "step": 5489 }, { "epoch": 0.7735117999295527, "grad_norm": 1.2693808282809826, "learning_rate": 2.5707915080234223e-06, "loss": 0.4433, "step": 5490 }, { "epoch": 0.7736526946107785, "grad_norm": 1.0736945752327307, "learning_rate": 2.5677374923760556e-06, "loss": 0.8274, "step": 5491 }, { "epoch": 0.7737935892920043, "grad_norm": 1.2187080652858482, "learning_rate": 2.5646850246149845e-06, "loss": 0.4729, "step": 5492 }, { "epoch": 0.7739344839732301, "grad_norm": 1.1182913806794699, "learning_rate": 2.5616341053759332e-06, "loss": 0.5554, "step": 5493 }, { "epoch": 0.7740753786544557, "grad_norm": 1.5213777798690602, "learning_rate": 2.5585847352943018e-06, "loss": 0.515, "step": 5494 }, { "epoch": 0.7742162733356815, "grad_norm": 1.1627312183620828, "learning_rate": 2.5555369150051746e-06, "loss": 0.4043, "step": 5495 }, { "epoch": 0.7743571680169073, "grad_norm": 0.9793745292852526, "learning_rate": 2.552490645143302e-06, "loss": 0.8153, "step": 5496 }, { "epoch": 0.7744980626981331, "grad_norm": 0.9980623552136163, "learning_rate": 2.549445926343126e-06, "loss": 0.8116, "step": 5497 }, { "epoch": 0.774638957379359, "grad_norm": 0.999169924836065, "learning_rate": 2.546402759238753e-06, "loss": 0.8439, "step": 5498 }, { "epoch": 0.7747798520605848, "grad_norm": 1.0111446049986763, "learning_rate": 2.5433611444639718e-06, "loss": 0.8612, "step": 5499 }, { "epoch": 0.7749207467418106, "grad_norm": 1.0643377282259145, "learning_rate": 2.5403210826522417e-06, "loss": 0.8107, "step": 5500 }, { "epoch": 0.7750616414230362, "grad_norm": 1.274979125181717, "learning_rate": 2.5372825744367123e-06, "loss": 0.4993, "step": 5501 }, { "epoch": 0.775202536104262, "grad_norm": 1.0128352190487044, "learning_rate": 2.5342456204501964e-06, "loss": 0.8421, "step": 5502 }, { "epoch": 0.7753434307854878, "grad_norm": 1.027261518861693, "learning_rate": 2.5312102213251888e-06, "loss": 0.789, "step": 5503 }, { "epoch": 0.7754843254667136, "grad_norm": 1.0972041357448814, "learning_rate": 2.528176377693857e-06, "loss": 0.8479, "step": 5504 }, { "epoch": 0.7756252201479394, "grad_norm": 1.254714968354944, "learning_rate": 2.5251440901880475e-06, "loss": 0.4681, "step": 5505 }, { "epoch": 0.7757661148291652, "grad_norm": 1.120791769682057, "learning_rate": 2.5221133594392855e-06, "loss": 0.4594, "step": 5506 }, { "epoch": 0.7759070095103909, "grad_norm": 0.9375765618944325, "learning_rate": 2.519084186078764e-06, "loss": 0.8128, "step": 5507 }, { "epoch": 0.7760479041916167, "grad_norm": 1.0400250164922424, "learning_rate": 2.516056570737365e-06, "loss": 0.8552, "step": 5508 }, { "epoch": 0.7761887988728425, "grad_norm": 1.1538144066688445, "learning_rate": 2.5130305140456244e-06, "loss": 0.4458, "step": 5509 }, { "epoch": 0.7763296935540683, "grad_norm": 1.1301627038320314, "learning_rate": 2.510006016633777e-06, "loss": 0.4078, "step": 5510 }, { "epoch": 0.7764705882352941, "grad_norm": 1.068843215676458, "learning_rate": 2.506983079131714e-06, "loss": 0.8753, "step": 5511 }, { "epoch": 0.7766114829165199, "grad_norm": 1.0998779860052426, "learning_rate": 2.503961702169019e-06, "loss": 0.8345, "step": 5512 }, { "epoch": 0.7767523775977457, "grad_norm": 1.1880130183430795, "learning_rate": 2.500941886374936e-06, "loss": 0.5278, "step": 5513 }, { "epoch": 0.7768932722789714, "grad_norm": 1.287672663616128, "learning_rate": 2.497923632378391e-06, "loss": 0.5808, "step": 5514 }, { "epoch": 0.7770341669601972, "grad_norm": 1.23382514717441, "learning_rate": 2.4949069408079786e-06, "loss": 0.5482, "step": 5515 }, { "epoch": 0.777175061641423, "grad_norm": 1.3037899867605696, "learning_rate": 2.49189181229198e-06, "loss": 0.4835, "step": 5516 }, { "epoch": 0.7773159563226488, "grad_norm": 0.9552754707965139, "learning_rate": 2.4888782474583363e-06, "loss": 0.8306, "step": 5517 }, { "epoch": 0.7774568510038746, "grad_norm": 0.9686239561836091, "learning_rate": 2.485866246934677e-06, "loss": 0.832, "step": 5518 }, { "epoch": 0.7775977456851004, "grad_norm": 1.3124875308992205, "learning_rate": 2.482855811348296e-06, "loss": 0.5913, "step": 5519 }, { "epoch": 0.7777386403663261, "grad_norm": 0.9757042104753628, "learning_rate": 2.479846941326164e-06, "loss": 0.8613, "step": 5520 }, { "epoch": 0.7778795350475519, "grad_norm": 1.2437571667798841, "learning_rate": 2.476839637494922e-06, "loss": 0.4119, "step": 5521 }, { "epoch": 0.7780204297287777, "grad_norm": 0.9847130232209605, "learning_rate": 2.4738339004808954e-06, "loss": 0.8096, "step": 5522 }, { "epoch": 0.7781613244100035, "grad_norm": 1.022865820891565, "learning_rate": 2.4708297309100727e-06, "loss": 0.8089, "step": 5523 }, { "epoch": 0.7783022190912293, "grad_norm": 0.9275866520780184, "learning_rate": 2.4678271294081203e-06, "loss": 0.8148, "step": 5524 }, { "epoch": 0.7784431137724551, "grad_norm": 1.0531785787619987, "learning_rate": 2.4648260966003747e-06, "loss": 0.889, "step": 5525 }, { "epoch": 0.7785840084536809, "grad_norm": 0.948668215677554, "learning_rate": 2.4618266331118548e-06, "loss": 0.837, "step": 5526 }, { "epoch": 0.7787249031349066, "grad_norm": 0.9541782989321971, "learning_rate": 2.4588287395672396e-06, "loss": 0.8838, "step": 5527 }, { "epoch": 0.7788657978161324, "grad_norm": 0.8998659711141422, "learning_rate": 2.455832416590894e-06, "loss": 0.8478, "step": 5528 }, { "epoch": 0.7790066924973582, "grad_norm": 1.1756580737841043, "learning_rate": 2.452837664806846e-06, "loss": 0.4651, "step": 5529 }, { "epoch": 0.779147587178584, "grad_norm": 1.0796186789010633, "learning_rate": 2.4498444848388026e-06, "loss": 0.9149, "step": 5530 }, { "epoch": 0.7792884818598098, "grad_norm": 1.268487949257062, "learning_rate": 2.446852877310135e-06, "loss": 0.8735, "step": 5531 }, { "epoch": 0.7794293765410356, "grad_norm": 1.0613427503495103, "learning_rate": 2.4438628428438995e-06, "loss": 0.8154, "step": 5532 }, { "epoch": 0.7795702712222614, "grad_norm": 1.059687718943051, "learning_rate": 2.4408743820628166e-06, "loss": 0.8451, "step": 5533 }, { "epoch": 0.7797111659034871, "grad_norm": 0.941013034604898, "learning_rate": 2.4378874955892783e-06, "loss": 0.8269, "step": 5534 }, { "epoch": 0.7798520605847129, "grad_norm": 0.9798111747486002, "learning_rate": 2.43490218404535e-06, "loss": 0.8369, "step": 5535 }, { "epoch": 0.7799929552659387, "grad_norm": 1.2313534745408312, "learning_rate": 2.4319184480527756e-06, "loss": 0.5389, "step": 5536 }, { "epoch": 0.7801338499471645, "grad_norm": 1.2184422866823064, "learning_rate": 2.4289362882329583e-06, "loss": 0.4391, "step": 5537 }, { "epoch": 0.7802747446283903, "grad_norm": 1.18852482588681, "learning_rate": 2.425955705206987e-06, "loss": 0.4395, "step": 5538 }, { "epoch": 0.7804156393096161, "grad_norm": 1.0350165709236447, "learning_rate": 2.4229766995956126e-06, "loss": 0.903, "step": 5539 }, { "epoch": 0.7805565339908418, "grad_norm": 0.9205510637070092, "learning_rate": 2.419999272019259e-06, "loss": 0.8551, "step": 5540 }, { "epoch": 0.7806974286720676, "grad_norm": 1.20296578156603, "learning_rate": 2.4170234230980206e-06, "loss": 0.8673, "step": 5541 }, { "epoch": 0.7808383233532934, "grad_norm": 1.2131071174761037, "learning_rate": 2.4140491534516697e-06, "loss": 0.6128, "step": 5542 }, { "epoch": 0.7809792180345192, "grad_norm": 1.0165539009248978, "learning_rate": 2.411076463699644e-06, "loss": 0.8139, "step": 5543 }, { "epoch": 0.781120112715745, "grad_norm": 1.0693476051837572, "learning_rate": 2.4081053544610523e-06, "loss": 0.8203, "step": 5544 }, { "epoch": 0.7812610073969708, "grad_norm": 1.1548968169647955, "learning_rate": 2.405135826354674e-06, "loss": 0.8832, "step": 5545 }, { "epoch": 0.7814019020781966, "grad_norm": 1.2819073626359225, "learning_rate": 2.40216787999896e-06, "loss": 0.4552, "step": 5546 }, { "epoch": 0.7815427967594223, "grad_norm": 0.9737160331002961, "learning_rate": 2.3992015160120353e-06, "loss": 0.8216, "step": 5547 }, { "epoch": 0.7816836914406481, "grad_norm": 1.2673780151185974, "learning_rate": 2.3962367350116878e-06, "loss": 0.4373, "step": 5548 }, { "epoch": 0.7818245861218739, "grad_norm": 0.9447824473402812, "learning_rate": 2.3932735376153847e-06, "loss": 0.8339, "step": 5549 }, { "epoch": 0.7819654808030997, "grad_norm": 0.984476355828548, "learning_rate": 2.390311924440257e-06, "loss": 0.8701, "step": 5550 }, { "epoch": 0.7821063754843255, "grad_norm": 1.380130038895838, "learning_rate": 2.3873518961031083e-06, "loss": 0.5349, "step": 5551 }, { "epoch": 0.7822472701655513, "grad_norm": 0.9779303220079641, "learning_rate": 2.384393453220406e-06, "loss": 0.816, "step": 5552 }, { "epoch": 0.782388164846777, "grad_norm": 1.4614468949347061, "learning_rate": 2.3814365964083e-06, "loss": 0.474, "step": 5553 }, { "epoch": 0.7825290595280028, "grad_norm": 1.0462297552252635, "learning_rate": 2.3784813262826003e-06, "loss": 0.909, "step": 5554 }, { "epoch": 0.7826699542092286, "grad_norm": 0.9609496502980414, "learning_rate": 2.375527643458786e-06, "loss": 0.8172, "step": 5555 }, { "epoch": 0.7828108488904544, "grad_norm": 1.0332893664156708, "learning_rate": 2.3725755485520086e-06, "loss": 0.8693, "step": 5556 }, { "epoch": 0.7829517435716802, "grad_norm": 0.9601902171392588, "learning_rate": 2.369625042177092e-06, "loss": 0.7698, "step": 5557 }, { "epoch": 0.783092638252906, "grad_norm": 1.2888767568988586, "learning_rate": 2.3666761249485214e-06, "loss": 0.453, "step": 5558 }, { "epoch": 0.7832335329341318, "grad_norm": 1.073254786495645, "learning_rate": 2.3637287974804613e-06, "loss": 0.8438, "step": 5559 }, { "epoch": 0.7833744276153575, "grad_norm": 1.31142077472663, "learning_rate": 2.3607830603867356e-06, "loss": 0.5832, "step": 5560 }, { "epoch": 0.7835153222965833, "grad_norm": 0.9553799414382508, "learning_rate": 2.35783891428084e-06, "loss": 0.8075, "step": 5561 }, { "epoch": 0.7836562169778091, "grad_norm": 1.3296609207909553, "learning_rate": 2.354896359775939e-06, "loss": 0.4932, "step": 5562 }, { "epoch": 0.7837971116590349, "grad_norm": 1.0872953567792145, "learning_rate": 2.3519553974848696e-06, "loss": 0.8362, "step": 5563 }, { "epoch": 0.7839380063402607, "grad_norm": 0.9928385444213508, "learning_rate": 2.3490160280201323e-06, "loss": 0.7526, "step": 5564 }, { "epoch": 0.7840789010214865, "grad_norm": 0.9083326031924994, "learning_rate": 2.3460782519938973e-06, "loss": 0.7867, "step": 5565 }, { "epoch": 0.7842197957027123, "grad_norm": 0.9977207301646471, "learning_rate": 2.3431420700179994e-06, "loss": 0.8064, "step": 5566 }, { "epoch": 0.784360690383938, "grad_norm": 1.108324395872844, "learning_rate": 2.340207482703951e-06, "loss": 0.8453, "step": 5567 }, { "epoch": 0.7845015850651638, "grad_norm": 1.2586420411612218, "learning_rate": 2.33727449066292e-06, "loss": 0.532, "step": 5568 }, { "epoch": 0.7846424797463896, "grad_norm": 0.9896670341367262, "learning_rate": 2.3343430945057554e-06, "loss": 0.8425, "step": 5569 }, { "epoch": 0.7847833744276154, "grad_norm": 0.9791050683749423, "learning_rate": 2.3314132948429625e-06, "loss": 0.8312, "step": 5570 }, { "epoch": 0.7849242691088412, "grad_norm": 0.8542550784400865, "learning_rate": 2.328485092284719e-06, "loss": 0.8149, "step": 5571 }, { "epoch": 0.785065163790067, "grad_norm": 1.2092477815076164, "learning_rate": 2.3255584874408676e-06, "loss": 0.4501, "step": 5572 }, { "epoch": 0.7852060584712927, "grad_norm": 1.0199664132925952, "learning_rate": 2.322633480920925e-06, "loss": 0.8294, "step": 5573 }, { "epoch": 0.7853469531525185, "grad_norm": 1.0099958323764324, "learning_rate": 2.3197100733340673e-06, "loss": 0.8333, "step": 5574 }, { "epoch": 0.7854878478337443, "grad_norm": 1.2702210858132226, "learning_rate": 2.3167882652891403e-06, "loss": 0.578, "step": 5575 }, { "epoch": 0.7856287425149701, "grad_norm": 1.1305363622922735, "learning_rate": 2.313868057394654e-06, "loss": 0.4712, "step": 5576 }, { "epoch": 0.7857696371961959, "grad_norm": 0.9073232021563438, "learning_rate": 2.3109494502587925e-06, "loss": 0.7836, "step": 5577 }, { "epoch": 0.7859105318774217, "grad_norm": 0.904272165109917, "learning_rate": 2.308032444489399e-06, "loss": 0.8411, "step": 5578 }, { "epoch": 0.7860514265586475, "grad_norm": 1.1076343692578132, "learning_rate": 2.3051170406939882e-06, "loss": 0.4895, "step": 5579 }, { "epoch": 0.7861923212398731, "grad_norm": 1.1784844160633152, "learning_rate": 2.302203239479739e-06, "loss": 0.8826, "step": 5580 }, { "epoch": 0.786333215921099, "grad_norm": 1.0586798254752368, "learning_rate": 2.2992910414534965e-06, "loss": 0.8202, "step": 5581 }, { "epoch": 0.7864741106023247, "grad_norm": 1.4579273458375495, "learning_rate": 2.2963804472217677e-06, "loss": 0.5845, "step": 5582 }, { "epoch": 0.7866150052835506, "grad_norm": 1.1616341437290334, "learning_rate": 2.2934714573907357e-06, "loss": 0.8686, "step": 5583 }, { "epoch": 0.7867558999647764, "grad_norm": 1.31659066878237, "learning_rate": 2.2905640725662392e-06, "loss": 0.5125, "step": 5584 }, { "epoch": 0.7868967946460022, "grad_norm": 1.0127872390559187, "learning_rate": 2.287658293353795e-06, "loss": 0.8177, "step": 5585 }, { "epoch": 0.787037689327228, "grad_norm": 1.1783797760178893, "learning_rate": 2.28475412035857e-06, "loss": 0.5049, "step": 5586 }, { "epoch": 0.7871785840084536, "grad_norm": 1.1631358357357549, "learning_rate": 2.2818515541854023e-06, "loss": 0.5113, "step": 5587 }, { "epoch": 0.7873194786896794, "grad_norm": 1.0322637360175697, "learning_rate": 2.2789505954388047e-06, "loss": 0.8119, "step": 5588 }, { "epoch": 0.7874603733709052, "grad_norm": 1.4393697964241658, "learning_rate": 2.276051244722941e-06, "loss": 0.5564, "step": 5589 }, { "epoch": 0.787601268052131, "grad_norm": 1.2689196466542336, "learning_rate": 2.273153502641652e-06, "loss": 0.5636, "step": 5590 }, { "epoch": 0.7877421627333568, "grad_norm": 1.284579034469033, "learning_rate": 2.2702573697984378e-06, "loss": 0.5075, "step": 5591 }, { "epoch": 0.7878830574145826, "grad_norm": 1.081505029199479, "learning_rate": 2.2673628467964615e-06, "loss": 0.8009, "step": 5592 }, { "epoch": 0.7880239520958083, "grad_norm": 0.9240871027561208, "learning_rate": 2.2644699342385525e-06, "loss": 0.8518, "step": 5593 }, { "epoch": 0.7881648467770341, "grad_norm": 1.0104886418353267, "learning_rate": 2.261578632727209e-06, "loss": 0.8051, "step": 5594 }, { "epoch": 0.7883057414582599, "grad_norm": 0.9838994020450045, "learning_rate": 2.25868894286459e-06, "loss": 0.8678, "step": 5595 }, { "epoch": 0.7884466361394857, "grad_norm": 0.9100694741444973, "learning_rate": 2.2558008652525176e-06, "loss": 0.8462, "step": 5596 }, { "epoch": 0.7885875308207115, "grad_norm": 1.0988412497528728, "learning_rate": 2.252914400492476e-06, "loss": 0.4473, "step": 5597 }, { "epoch": 0.7887284255019373, "grad_norm": 1.0650487457302495, "learning_rate": 2.250029549185626e-06, "loss": 0.8103, "step": 5598 }, { "epoch": 0.7888693201831631, "grad_norm": 1.1544531770211535, "learning_rate": 2.2471463119327743e-06, "loss": 0.4401, "step": 5599 }, { "epoch": 0.7890102148643888, "grad_norm": 1.4773465239047954, "learning_rate": 2.244264689334409e-06, "loss": 0.4424, "step": 5600 }, { "epoch": 0.7891511095456146, "grad_norm": 0.9015284841996585, "learning_rate": 2.241384681990668e-06, "loss": 0.8518, "step": 5601 }, { "epoch": 0.7892920042268404, "grad_norm": 1.2509492437112355, "learning_rate": 2.238506290501362e-06, "loss": 0.5244, "step": 5602 }, { "epoch": 0.7894328989080662, "grad_norm": 1.003261933970346, "learning_rate": 2.2356295154659556e-06, "loss": 0.7765, "step": 5603 }, { "epoch": 0.789573793589292, "grad_norm": 1.2622623226810683, "learning_rate": 2.232754357483591e-06, "loss": 0.5806, "step": 5604 }, { "epoch": 0.7897146882705178, "grad_norm": 1.448098527370434, "learning_rate": 2.2298808171530574e-06, "loss": 0.5349, "step": 5605 }, { "epoch": 0.7898555829517435, "grad_norm": 1.0767715654451893, "learning_rate": 2.2270088950728243e-06, "loss": 0.8552, "step": 5606 }, { "epoch": 0.7899964776329693, "grad_norm": 1.187600804931127, "learning_rate": 2.224138591841004e-06, "loss": 0.4457, "step": 5607 }, { "epoch": 0.7901373723141951, "grad_norm": 1.011931468141413, "learning_rate": 2.22126990805539e-06, "loss": 0.8256, "step": 5608 }, { "epoch": 0.7902782669954209, "grad_norm": 1.0222570448533255, "learning_rate": 2.2184028443134276e-06, "loss": 0.8195, "step": 5609 }, { "epoch": 0.7904191616766467, "grad_norm": 0.935496995441179, "learning_rate": 2.215537401212232e-06, "loss": 0.8285, "step": 5610 }, { "epoch": 0.7905600563578725, "grad_norm": 0.9774990311162425, "learning_rate": 2.2126735793485752e-06, "loss": 0.8464, "step": 5611 }, { "epoch": 0.7907009510390983, "grad_norm": 1.2641327331599264, "learning_rate": 2.2098113793188925e-06, "loss": 0.4804, "step": 5612 }, { "epoch": 0.790841845720324, "grad_norm": 0.9828209485607278, "learning_rate": 2.206950801719281e-06, "loss": 0.8512, "step": 5613 }, { "epoch": 0.7909827404015498, "grad_norm": 0.9478006505842016, "learning_rate": 2.2040918471455054e-06, "loss": 0.7713, "step": 5614 }, { "epoch": 0.7911236350827756, "grad_norm": 1.200468380948719, "learning_rate": 2.2012345161929825e-06, "loss": 0.5434, "step": 5615 }, { "epoch": 0.7912645297640014, "grad_norm": 1.1818208796720262, "learning_rate": 2.198378809456806e-06, "loss": 0.7759, "step": 5616 }, { "epoch": 0.7914054244452272, "grad_norm": 1.0025701193629992, "learning_rate": 2.1955247275317114e-06, "loss": 0.808, "step": 5617 }, { "epoch": 0.791546319126453, "grad_norm": 0.9938276075503002, "learning_rate": 2.192672271012114e-06, "loss": 0.8141, "step": 5618 }, { "epoch": 0.7916872138076788, "grad_norm": 1.0890333086440074, "learning_rate": 2.189821440492077e-06, "loss": 0.8201, "step": 5619 }, { "epoch": 0.7918281084889045, "grad_norm": 0.9236679870457918, "learning_rate": 2.186972236565337e-06, "loss": 0.7986, "step": 5620 }, { "epoch": 0.7919690031701303, "grad_norm": 1.4109217669993237, "learning_rate": 2.184124659825283e-06, "loss": 0.455, "step": 5621 }, { "epoch": 0.7921098978513561, "grad_norm": 1.0229614277100254, "learning_rate": 2.181278710864968e-06, "loss": 0.3963, "step": 5622 }, { "epoch": 0.7922507925325819, "grad_norm": 1.021394788550945, "learning_rate": 2.1784343902771032e-06, "loss": 0.8081, "step": 5623 }, { "epoch": 0.7923916872138077, "grad_norm": 0.9355401288051727, "learning_rate": 2.175591698654068e-06, "loss": 0.8449, "step": 5624 }, { "epoch": 0.7925325818950335, "grad_norm": 1.0736195352811717, "learning_rate": 2.172750636587895e-06, "loss": 0.8514, "step": 5625 }, { "epoch": 0.7926734765762592, "grad_norm": 1.0824226733419704, "learning_rate": 2.1699112046702853e-06, "loss": 0.8444, "step": 5626 }, { "epoch": 0.792814371257485, "grad_norm": 1.032402817014748, "learning_rate": 2.1670734034925867e-06, "loss": 0.7895, "step": 5627 }, { "epoch": 0.7929552659387108, "grad_norm": 1.1048331060978467, "learning_rate": 2.1642372336458226e-06, "loss": 0.8798, "step": 5628 }, { "epoch": 0.7930961606199366, "grad_norm": 1.0210146340572597, "learning_rate": 2.1614026957206703e-06, "loss": 0.8094, "step": 5629 }, { "epoch": 0.7932370553011624, "grad_norm": 1.2938940405371895, "learning_rate": 2.1585697903074622e-06, "loss": 0.4998, "step": 5630 }, { "epoch": 0.7933779499823882, "grad_norm": 1.2181312931693522, "learning_rate": 2.155738517996202e-06, "loss": 0.4736, "step": 5631 }, { "epoch": 0.793518844663614, "grad_norm": 1.0753656814196122, "learning_rate": 2.152908879376544e-06, "loss": 0.8638, "step": 5632 }, { "epoch": 0.7936597393448397, "grad_norm": 1.0217856869373896, "learning_rate": 2.1500808750378053e-06, "loss": 0.8274, "step": 5633 }, { "epoch": 0.7938006340260655, "grad_norm": 1.0441266295358353, "learning_rate": 2.14725450556896e-06, "loss": 0.8623, "step": 5634 }, { "epoch": 0.7939415287072913, "grad_norm": 1.0504661426094035, "learning_rate": 2.1444297715586494e-06, "loss": 0.7496, "step": 5635 }, { "epoch": 0.7940824233885171, "grad_norm": 0.9923791584271046, "learning_rate": 2.1416066735951634e-06, "loss": 0.8135, "step": 5636 }, { "epoch": 0.7942233180697429, "grad_norm": 1.4021840154028884, "learning_rate": 2.1387852122664654e-06, "loss": 0.6771, "step": 5637 }, { "epoch": 0.7943642127509687, "grad_norm": 1.0063548353382963, "learning_rate": 2.1359653881601585e-06, "loss": 0.8473, "step": 5638 }, { "epoch": 0.7945051074321944, "grad_norm": 1.0301043292554397, "learning_rate": 2.1331472018635234e-06, "loss": 0.8682, "step": 5639 }, { "epoch": 0.7946460021134202, "grad_norm": 1.1255431932868154, "learning_rate": 2.1303306539634884e-06, "loss": 0.8586, "step": 5640 }, { "epoch": 0.794786896794646, "grad_norm": 1.0103903572917228, "learning_rate": 2.1275157450466467e-06, "loss": 0.8441, "step": 5641 }, { "epoch": 0.7949277914758718, "grad_norm": 1.03187169382902, "learning_rate": 2.1247024756992472e-06, "loss": 0.8601, "step": 5642 }, { "epoch": 0.7950686861570976, "grad_norm": 1.212234080913314, "learning_rate": 2.1218908465071966e-06, "loss": 0.5147, "step": 5643 }, { "epoch": 0.7952095808383234, "grad_norm": 1.3623432896311811, "learning_rate": 2.1190808580560583e-06, "loss": 0.7239, "step": 5644 }, { "epoch": 0.7953504755195492, "grad_norm": 1.1844727199574807, "learning_rate": 2.1162725109310645e-06, "loss": 0.4475, "step": 5645 }, { "epoch": 0.7954913702007749, "grad_norm": 1.2857224396018134, "learning_rate": 2.113465805717089e-06, "loss": 0.4272, "step": 5646 }, { "epoch": 0.7956322648820007, "grad_norm": 1.3156196015623767, "learning_rate": 2.1106607429986837e-06, "loss": 0.5449, "step": 5647 }, { "epoch": 0.7957731595632265, "grad_norm": 1.0950873121453644, "learning_rate": 2.1078573233600352e-06, "loss": 0.4617, "step": 5648 }, { "epoch": 0.7959140542444523, "grad_norm": 1.2707466401133762, "learning_rate": 2.1050555473850076e-06, "loss": 0.4825, "step": 5649 }, { "epoch": 0.7960549489256781, "grad_norm": 0.9662166472334233, "learning_rate": 2.1022554156571107e-06, "loss": 0.8731, "step": 5650 }, { "epoch": 0.7961958436069039, "grad_norm": 1.402871487770726, "learning_rate": 2.0994569287595213e-06, "loss": 0.4555, "step": 5651 }, { "epoch": 0.7963367382881297, "grad_norm": 1.220722258404636, "learning_rate": 2.0966600872750654e-06, "loss": 0.4761, "step": 5652 }, { "epoch": 0.7964776329693554, "grad_norm": 1.051709537997873, "learning_rate": 2.09386489178623e-06, "loss": 0.8004, "step": 5653 }, { "epoch": 0.7966185276505812, "grad_norm": 0.9885776514848149, "learning_rate": 2.091071342875157e-06, "loss": 0.8428, "step": 5654 }, { "epoch": 0.796759422331807, "grad_norm": 1.0352124881497644, "learning_rate": 2.0882794411236506e-06, "loss": 0.8666, "step": 5655 }, { "epoch": 0.7969003170130328, "grad_norm": 1.267085676770265, "learning_rate": 2.0854891871131653e-06, "loss": 0.4307, "step": 5656 }, { "epoch": 0.7970412116942586, "grad_norm": 1.4054845874525046, "learning_rate": 2.082700581424821e-06, "loss": 0.5349, "step": 5657 }, { "epoch": 0.7971821063754844, "grad_norm": 1.1207465453093166, "learning_rate": 2.079913624639381e-06, "loss": 0.8324, "step": 5658 }, { "epoch": 0.7973230010567101, "grad_norm": 0.9895915872809824, "learning_rate": 2.07712831733728e-06, "loss": 0.8065, "step": 5659 }, { "epoch": 0.7974638957379359, "grad_norm": 1.450695427434395, "learning_rate": 2.074344660098597e-06, "loss": 0.553, "step": 5660 }, { "epoch": 0.7976047904191617, "grad_norm": 1.1545149473941263, "learning_rate": 2.0715626535030776e-06, "loss": 0.5223, "step": 5661 }, { "epoch": 0.7977456851003875, "grad_norm": 1.2570615544350663, "learning_rate": 2.0687822981301165e-06, "loss": 0.5559, "step": 5662 }, { "epoch": 0.7978865797816133, "grad_norm": 1.3442480217207589, "learning_rate": 2.0660035945587676e-06, "loss": 0.4791, "step": 5663 }, { "epoch": 0.7980274744628391, "grad_norm": 1.0689691428717096, "learning_rate": 2.063226543367736e-06, "loss": 0.828, "step": 5664 }, { "epoch": 0.7981683691440649, "grad_norm": 1.032405792801568, "learning_rate": 2.0604511451353916e-06, "loss": 0.8295, "step": 5665 }, { "epoch": 0.7983092638252905, "grad_norm": 1.017874883434041, "learning_rate": 2.0576774004397494e-06, "loss": 0.7934, "step": 5666 }, { "epoch": 0.7984501585065163, "grad_norm": 1.1026056258718593, "learning_rate": 2.0549053098584926e-06, "loss": 0.8449, "step": 5667 }, { "epoch": 0.7985910531877422, "grad_norm": 1.2005841321540989, "learning_rate": 2.0521348739689506e-06, "loss": 0.4254, "step": 5668 }, { "epoch": 0.798731947868968, "grad_norm": 1.0337837556258402, "learning_rate": 2.0493660933481085e-06, "loss": 0.8226, "step": 5669 }, { "epoch": 0.7988728425501938, "grad_norm": 0.9650801909912412, "learning_rate": 2.0465989685726096e-06, "loss": 0.8217, "step": 5670 }, { "epoch": 0.7990137372314196, "grad_norm": 1.0322766835596353, "learning_rate": 2.043833500218748e-06, "loss": 0.8094, "step": 5671 }, { "epoch": 0.7991546319126454, "grad_norm": 0.9730312329012312, "learning_rate": 2.0410696888624837e-06, "loss": 0.7922, "step": 5672 }, { "epoch": 0.799295526593871, "grad_norm": 1.2909220923922724, "learning_rate": 2.038307535079419e-06, "loss": 0.4195, "step": 5673 }, { "epoch": 0.7994364212750968, "grad_norm": 1.3192567135714166, "learning_rate": 2.0355470394448184e-06, "loss": 0.5323, "step": 5674 }, { "epoch": 0.7995773159563226, "grad_norm": 1.2994528987937934, "learning_rate": 2.0327882025335944e-06, "loss": 0.5839, "step": 5675 }, { "epoch": 0.7997182106375484, "grad_norm": 1.0608035416814225, "learning_rate": 2.030031024920325e-06, "loss": 0.8031, "step": 5676 }, { "epoch": 0.7998591053187742, "grad_norm": 1.0199737918059193, "learning_rate": 2.0272755071792295e-06, "loss": 0.8787, "step": 5677 }, { "epoch": 0.8, "grad_norm": 1.2733087379892425, "learning_rate": 2.024521649884197e-06, "loss": 0.5135, "step": 5678 }, { "epoch": 0.8001408946812257, "grad_norm": 0.9806604458655758, "learning_rate": 2.0217694536087506e-06, "loss": 0.8954, "step": 5679 }, { "epoch": 0.8002817893624515, "grad_norm": 0.9244954290813208, "learning_rate": 2.0190189189260877e-06, "loss": 0.8173, "step": 5680 }, { "epoch": 0.8004226840436773, "grad_norm": 0.9969821514988562, "learning_rate": 2.0162700464090433e-06, "loss": 0.796, "step": 5681 }, { "epoch": 0.8005635787249031, "grad_norm": 0.9636317273027954, "learning_rate": 2.013522836630122e-06, "loss": 0.8344, "step": 5682 }, { "epoch": 0.8007044734061289, "grad_norm": 1.251239347011778, "learning_rate": 2.0107772901614685e-06, "loss": 0.4253, "step": 5683 }, { "epoch": 0.8008453680873547, "grad_norm": 1.059589215017148, "learning_rate": 2.0080334075748874e-06, "loss": 0.8453, "step": 5684 }, { "epoch": 0.8009862627685805, "grad_norm": 1.033490051376356, "learning_rate": 2.0052911894418338e-06, "loss": 0.8397, "step": 5685 }, { "epoch": 0.8011271574498062, "grad_norm": 1.1877014714320286, "learning_rate": 2.002550636333421e-06, "loss": 0.419, "step": 5686 }, { "epoch": 0.801268052131032, "grad_norm": 0.9301208858933667, "learning_rate": 1.9998117488204083e-06, "loss": 0.8475, "step": 5687 }, { "epoch": 0.8014089468122578, "grad_norm": 1.0189728459893779, "learning_rate": 1.9970745274732195e-06, "loss": 0.9255, "step": 5688 }, { "epoch": 0.8015498414934836, "grad_norm": 1.0636468462021655, "learning_rate": 1.9943389728619187e-06, "loss": 0.8482, "step": 5689 }, { "epoch": 0.8016907361747094, "grad_norm": 1.0693734857336457, "learning_rate": 1.9916050855562307e-06, "loss": 0.8835, "step": 5690 }, { "epoch": 0.8018316308559352, "grad_norm": 0.9915011908964805, "learning_rate": 1.9888728661255275e-06, "loss": 0.7989, "step": 5691 }, { "epoch": 0.8019725255371609, "grad_norm": 0.9836447363475286, "learning_rate": 1.9861423151388416e-06, "loss": 0.8242, "step": 5692 }, { "epoch": 0.8021134202183867, "grad_norm": 0.9653194263640569, "learning_rate": 1.9834134331648515e-06, "loss": 0.85, "step": 5693 }, { "epoch": 0.8022543148996125, "grad_norm": 1.2813200002650045, "learning_rate": 1.980686220771889e-06, "loss": 0.4981, "step": 5694 }, { "epoch": 0.8023952095808383, "grad_norm": 1.1628908958980504, "learning_rate": 1.9779606785279394e-06, "loss": 0.5085, "step": 5695 }, { "epoch": 0.8025361042620641, "grad_norm": 1.017101838651377, "learning_rate": 1.9752368070006424e-06, "loss": 0.8785, "step": 5696 }, { "epoch": 0.8026769989432899, "grad_norm": 0.9549670979226091, "learning_rate": 1.972514606757282e-06, "loss": 0.7931, "step": 5697 }, { "epoch": 0.8028178936245157, "grad_norm": 0.9898201227104847, "learning_rate": 1.9697940783648074e-06, "loss": 0.8873, "step": 5698 }, { "epoch": 0.8029587883057414, "grad_norm": 1.2249370648978926, "learning_rate": 1.967075222389807e-06, "loss": 0.5377, "step": 5699 }, { "epoch": 0.8030996829869672, "grad_norm": 1.2875585848373046, "learning_rate": 1.964358039398526e-06, "loss": 0.486, "step": 5700 }, { "epoch": 0.803240577668193, "grad_norm": 1.0459186467074666, "learning_rate": 1.9616425299568575e-06, "loss": 0.8051, "step": 5701 }, { "epoch": 0.8033814723494188, "grad_norm": 1.1493265436319504, "learning_rate": 1.9589286946303565e-06, "loss": 0.8769, "step": 5702 }, { "epoch": 0.8035223670306446, "grad_norm": 1.0414169797694144, "learning_rate": 1.9562165339842177e-06, "loss": 0.8534, "step": 5703 }, { "epoch": 0.8036632617118704, "grad_norm": 0.9650692239996717, "learning_rate": 1.9535060485832922e-06, "loss": 0.835, "step": 5704 }, { "epoch": 0.8038041563930962, "grad_norm": 0.9656468394215483, "learning_rate": 1.950797238992078e-06, "loss": 0.8407, "step": 5705 }, { "epoch": 0.8039450510743219, "grad_norm": 0.9639684167167093, "learning_rate": 1.948090105774735e-06, "loss": 0.8393, "step": 5706 }, { "epoch": 0.8040859457555477, "grad_norm": 0.9503265969955063, "learning_rate": 1.945384649495059e-06, "loss": 0.8291, "step": 5707 }, { "epoch": 0.8042268404367735, "grad_norm": 1.1909096797021792, "learning_rate": 1.94268087071651e-06, "loss": 0.4927, "step": 5708 }, { "epoch": 0.8043677351179993, "grad_norm": 1.0482811077738818, "learning_rate": 1.939978770002191e-06, "loss": 0.8204, "step": 5709 }, { "epoch": 0.8045086297992251, "grad_norm": 0.9949737359217039, "learning_rate": 1.937278347914856e-06, "loss": 0.7874, "step": 5710 }, { "epoch": 0.8046495244804509, "grad_norm": 1.2750473068851598, "learning_rate": 1.9345796050169086e-06, "loss": 0.4442, "step": 5711 }, { "epoch": 0.8047904191616766, "grad_norm": 0.967595391310865, "learning_rate": 1.93188254187041e-06, "loss": 0.8582, "step": 5712 }, { "epoch": 0.8049313138429024, "grad_norm": 1.4853653022171625, "learning_rate": 1.929187159037064e-06, "loss": 0.5424, "step": 5713 }, { "epoch": 0.8050722085241282, "grad_norm": 1.1028149574480839, "learning_rate": 1.9264934570782256e-06, "loss": 0.7783, "step": 5714 }, { "epoch": 0.805213103205354, "grad_norm": 0.9654714222571513, "learning_rate": 1.9238014365549017e-06, "loss": 0.7909, "step": 5715 }, { "epoch": 0.8053539978865798, "grad_norm": 1.0933114982006924, "learning_rate": 1.9211110980277458e-06, "loss": 0.8874, "step": 5716 }, { "epoch": 0.8054948925678056, "grad_norm": 1.214887621767082, "learning_rate": 1.9184224420570684e-06, "loss": 0.4794, "step": 5717 }, { "epoch": 0.8056357872490314, "grad_norm": 0.9549923552638839, "learning_rate": 1.9157354692028194e-06, "loss": 0.8204, "step": 5718 }, { "epoch": 0.8057766819302571, "grad_norm": 0.9479297199134745, "learning_rate": 1.913050180024608e-06, "loss": 0.7997, "step": 5719 }, { "epoch": 0.8059175766114829, "grad_norm": 1.0311863510662418, "learning_rate": 1.910366575081687e-06, "loss": 0.8338, "step": 5720 }, { "epoch": 0.8060584712927087, "grad_norm": 1.2154065584287885, "learning_rate": 1.9076846549329585e-06, "loss": 0.4878, "step": 5721 }, { "epoch": 0.8061993659739345, "grad_norm": 1.0357589875004067, "learning_rate": 1.9050044201369721e-06, "loss": 0.8424, "step": 5722 }, { "epoch": 0.8063402606551603, "grad_norm": 1.1576375910137782, "learning_rate": 1.9023258712519355e-06, "loss": 0.855, "step": 5723 }, { "epoch": 0.8064811553363861, "grad_norm": 1.508454674366303, "learning_rate": 1.8996490088356945e-06, "loss": 0.4721, "step": 5724 }, { "epoch": 0.8066220500176118, "grad_norm": 0.9547339710228654, "learning_rate": 1.8969738334457499e-06, "loss": 0.8559, "step": 5725 }, { "epoch": 0.8067629446988376, "grad_norm": 1.2939183887947296, "learning_rate": 1.894300345639245e-06, "loss": 0.4034, "step": 5726 }, { "epoch": 0.8069038393800634, "grad_norm": 1.0300092145776891, "learning_rate": 1.8916285459729833e-06, "loss": 0.7787, "step": 5727 }, { "epoch": 0.8070447340612892, "grad_norm": 1.0026744606046998, "learning_rate": 1.8889584350034018e-06, "loss": 0.7845, "step": 5728 }, { "epoch": 0.807185628742515, "grad_norm": 1.0523899315703078, "learning_rate": 1.8862900132866003e-06, "loss": 0.8354, "step": 5729 }, { "epoch": 0.8073265234237408, "grad_norm": 1.0809167835194418, "learning_rate": 1.8836232813783162e-06, "loss": 0.8881, "step": 5730 }, { "epoch": 0.8074674181049666, "grad_norm": 1.0903942651969085, "learning_rate": 1.8809582398339388e-06, "loss": 0.8311, "step": 5731 }, { "epoch": 0.8076083127861923, "grad_norm": 1.1229115803168104, "learning_rate": 1.878294889208504e-06, "loss": 0.8151, "step": 5732 }, { "epoch": 0.8077492074674181, "grad_norm": 1.5191059583645747, "learning_rate": 1.8756332300566993e-06, "loss": 0.4972, "step": 5733 }, { "epoch": 0.8078901021486439, "grad_norm": 1.3335400192625704, "learning_rate": 1.8729732629328578e-06, "loss": 0.5686, "step": 5734 }, { "epoch": 0.8080309968298697, "grad_norm": 0.9169929986083087, "learning_rate": 1.8703149883909566e-06, "loss": 0.7568, "step": 5735 }, { "epoch": 0.8081718915110955, "grad_norm": 0.9924610566995617, "learning_rate": 1.8676584069846237e-06, "loss": 0.8687, "step": 5736 }, { "epoch": 0.8083127861923213, "grad_norm": 1.2603846396272353, "learning_rate": 1.8650035192671378e-06, "loss": 0.4232, "step": 5737 }, { "epoch": 0.8084536808735471, "grad_norm": 1.0082805670199533, "learning_rate": 1.8623503257914156e-06, "loss": 0.857, "step": 5738 }, { "epoch": 0.8085945755547728, "grad_norm": 0.9349842939422747, "learning_rate": 1.8596988271100336e-06, "loss": 0.8885, "step": 5739 }, { "epoch": 0.8087354702359986, "grad_norm": 1.1139221352790445, "learning_rate": 1.8570490237752037e-06, "loss": 0.4927, "step": 5740 }, { "epoch": 0.8088763649172244, "grad_norm": 1.217248699294217, "learning_rate": 1.854400916338791e-06, "loss": 0.4402, "step": 5741 }, { "epoch": 0.8090172595984502, "grad_norm": 1.0419680839490988, "learning_rate": 1.8517545053523035e-06, "loss": 0.8214, "step": 5742 }, { "epoch": 0.809158154279676, "grad_norm": 1.0756289796089313, "learning_rate": 1.8491097913669021e-06, "loss": 0.8779, "step": 5743 }, { "epoch": 0.8092990489609018, "grad_norm": 0.9427133636953922, "learning_rate": 1.8464667749333875e-06, "loss": 0.8159, "step": 5744 }, { "epoch": 0.8094399436421275, "grad_norm": 1.2373723524094746, "learning_rate": 1.8438254566022106e-06, "loss": 0.8018, "step": 5745 }, { "epoch": 0.8095808383233533, "grad_norm": 1.2359678241093586, "learning_rate": 1.841185836923466e-06, "loss": 0.4198, "step": 5746 }, { "epoch": 0.8097217330045791, "grad_norm": 1.0745972072199292, "learning_rate": 1.8385479164468988e-06, "loss": 0.8396, "step": 5747 }, { "epoch": 0.8098626276858049, "grad_norm": 1.051725072332493, "learning_rate": 1.835911695721896e-06, "loss": 0.8663, "step": 5748 }, { "epoch": 0.8100035223670307, "grad_norm": 1.3195014128307778, "learning_rate": 1.8332771752974943e-06, "loss": 0.5172, "step": 5749 }, { "epoch": 0.8101444170482565, "grad_norm": 1.0166805562400265, "learning_rate": 1.830644355722373e-06, "loss": 0.8161, "step": 5750 }, { "epoch": 0.8102853117294823, "grad_norm": 0.9790426909079061, "learning_rate": 1.828013237544858e-06, "loss": 0.8225, "step": 5751 }, { "epoch": 0.810426206410708, "grad_norm": 1.0472712490804204, "learning_rate": 1.8253838213129204e-06, "loss": 0.8501, "step": 5752 }, { "epoch": 0.8105671010919338, "grad_norm": 1.0142702113605244, "learning_rate": 1.8227561075741806e-06, "loss": 0.7928, "step": 5753 }, { "epoch": 0.8107079957731596, "grad_norm": 0.8630697407808561, "learning_rate": 1.8201300968759005e-06, "loss": 0.7733, "step": 5754 }, { "epoch": 0.8108488904543854, "grad_norm": 0.9435603997371487, "learning_rate": 1.8175057897649873e-06, "loss": 0.7895, "step": 5755 }, { "epoch": 0.8109897851356112, "grad_norm": 0.9757855044485553, "learning_rate": 1.8148831867879958e-06, "loss": 0.8032, "step": 5756 }, { "epoch": 0.811130679816837, "grad_norm": 1.0390001403348628, "learning_rate": 1.8122622884911211e-06, "loss": 0.7703, "step": 5757 }, { "epoch": 0.8112715744980628, "grad_norm": 1.2541901844359709, "learning_rate": 1.8096430954202115e-06, "loss": 0.5422, "step": 5758 }, { "epoch": 0.8114124691792884, "grad_norm": 1.4774014625254621, "learning_rate": 1.8070256081207515e-06, "loss": 0.4886, "step": 5759 }, { "epoch": 0.8115533638605142, "grad_norm": 1.1701004213481816, "learning_rate": 1.8044098271378796e-06, "loss": 0.4404, "step": 5760 }, { "epoch": 0.81169425854174, "grad_norm": 1.2896021208817245, "learning_rate": 1.8017957530163687e-06, "loss": 0.5283, "step": 5761 }, { "epoch": 0.8118351532229658, "grad_norm": 1.1780211315059685, "learning_rate": 1.7991833863006437e-06, "loss": 0.5288, "step": 5762 }, { "epoch": 0.8119760479041916, "grad_norm": 0.9628745066444606, "learning_rate": 1.7965727275347666e-06, "loss": 0.8578, "step": 5763 }, { "epoch": 0.8121169425854174, "grad_norm": 1.1484666398331995, "learning_rate": 1.7939637772624552e-06, "loss": 0.8649, "step": 5764 }, { "epoch": 0.8122578372666431, "grad_norm": 0.9362493164846523, "learning_rate": 1.7913565360270612e-06, "loss": 0.8565, "step": 5765 }, { "epoch": 0.8123987319478689, "grad_norm": 1.0246481084552226, "learning_rate": 1.788751004371585e-06, "loss": 0.8519, "step": 5766 }, { "epoch": 0.8125396266290947, "grad_norm": 0.9089157829341094, "learning_rate": 1.7861471828386655e-06, "loss": 0.8192, "step": 5767 }, { "epoch": 0.8126805213103205, "grad_norm": 1.06409773021068, "learning_rate": 1.783545071970596e-06, "loss": 0.8654, "step": 5768 }, { "epoch": 0.8128214159915463, "grad_norm": 1.167600182240979, "learning_rate": 1.7809446723093015e-06, "loss": 0.447, "step": 5769 }, { "epoch": 0.8129623106727721, "grad_norm": 1.2895720752565436, "learning_rate": 1.7783459843963624e-06, "loss": 0.4653, "step": 5770 }, { "epoch": 0.8131032053539979, "grad_norm": 1.2011628871014965, "learning_rate": 1.7757490087729945e-06, "loss": 0.8326, "step": 5771 }, { "epoch": 0.8132441000352236, "grad_norm": 1.1080059645697944, "learning_rate": 1.7731537459800563e-06, "loss": 0.8954, "step": 5772 }, { "epoch": 0.8133849947164494, "grad_norm": 1.2978037686626456, "learning_rate": 1.7705601965580533e-06, "loss": 0.5586, "step": 5773 }, { "epoch": 0.8135258893976752, "grad_norm": 1.1632925279652477, "learning_rate": 1.7679683610471354e-06, "loss": 0.7948, "step": 5774 }, { "epoch": 0.813666784078901, "grad_norm": 1.0145039091637094, "learning_rate": 1.76537823998709e-06, "loss": 0.8704, "step": 5775 }, { "epoch": 0.8138076787601268, "grad_norm": 1.2023429731186428, "learning_rate": 1.7627898339173576e-06, "loss": 0.4293, "step": 5776 }, { "epoch": 0.8139485734413526, "grad_norm": 1.1366992553938302, "learning_rate": 1.760203143377005e-06, "loss": 0.8941, "step": 5777 }, { "epoch": 0.8140894681225783, "grad_norm": 0.9935214213727184, "learning_rate": 1.7576181689047588e-06, "loss": 0.8641, "step": 5778 }, { "epoch": 0.8142303628038041, "grad_norm": 1.1735860434807468, "learning_rate": 1.7550349110389765e-06, "loss": 0.867, "step": 5779 }, { "epoch": 0.8143712574850299, "grad_norm": 1.2722854760673168, "learning_rate": 1.7524533703176672e-06, "loss": 0.5246, "step": 5780 }, { "epoch": 0.8145121521662557, "grad_norm": 1.1652698278660036, "learning_rate": 1.7498735472784733e-06, "loss": 0.4214, "step": 5781 }, { "epoch": 0.8146530468474815, "grad_norm": 1.1282268359997716, "learning_rate": 1.7472954424586874e-06, "loss": 0.8821, "step": 5782 }, { "epoch": 0.8147939415287073, "grad_norm": 1.037025293349304, "learning_rate": 1.7447190563952343e-06, "loss": 0.7785, "step": 5783 }, { "epoch": 0.8149348362099331, "grad_norm": 1.0590406150126517, "learning_rate": 1.7421443896246948e-06, "loss": 0.856, "step": 5784 }, { "epoch": 0.8150757308911588, "grad_norm": 0.9528492064647842, "learning_rate": 1.7395714426832778e-06, "loss": 0.8078, "step": 5785 }, { "epoch": 0.8152166255723846, "grad_norm": 1.152503062222104, "learning_rate": 1.7370002161068477e-06, "loss": 0.8234, "step": 5786 }, { "epoch": 0.8153575202536104, "grad_norm": 0.9785783914762427, "learning_rate": 1.7344307104308944e-06, "loss": 0.8402, "step": 5787 }, { "epoch": 0.8154984149348362, "grad_norm": 0.9722147853769992, "learning_rate": 1.7318629261905639e-06, "loss": 0.8616, "step": 5788 }, { "epoch": 0.815639309616062, "grad_norm": 1.3009642635004075, "learning_rate": 1.7292968639206342e-06, "loss": 0.5179, "step": 5789 }, { "epoch": 0.8157802042972878, "grad_norm": 1.029695940565176, "learning_rate": 1.7267325241555321e-06, "loss": 0.87, "step": 5790 }, { "epoch": 0.8159210989785136, "grad_norm": 1.1928137920899213, "learning_rate": 1.7241699074293205e-06, "loss": 0.4799, "step": 5791 }, { "epoch": 0.8160619936597393, "grad_norm": 0.8840146468925532, "learning_rate": 1.7216090142757046e-06, "loss": 0.7887, "step": 5792 }, { "epoch": 0.8162028883409651, "grad_norm": 1.0039956765216531, "learning_rate": 1.7190498452280292e-06, "loss": 0.8722, "step": 5793 }, { "epoch": 0.8163437830221909, "grad_norm": 0.9152161406546961, "learning_rate": 1.7164924008192852e-06, "loss": 0.7843, "step": 5794 }, { "epoch": 0.8164846777034167, "grad_norm": 1.0933882382177205, "learning_rate": 1.713936681582097e-06, "loss": 0.8936, "step": 5795 }, { "epoch": 0.8166255723846425, "grad_norm": 0.96305485016593, "learning_rate": 1.7113826880487405e-06, "loss": 0.8264, "step": 5796 }, { "epoch": 0.8167664670658683, "grad_norm": 1.1037965197106272, "learning_rate": 1.7088304207511176e-06, "loss": 0.8276, "step": 5797 }, { "epoch": 0.816907361747094, "grad_norm": 1.182548336190464, "learning_rate": 1.706279880220779e-06, "loss": 0.4565, "step": 5798 }, { "epoch": 0.8170482564283198, "grad_norm": 1.2204548032432785, "learning_rate": 1.7037310669889206e-06, "loss": 0.4339, "step": 5799 }, { "epoch": 0.8171891511095456, "grad_norm": 1.0412876277897876, "learning_rate": 1.7011839815863674e-06, "loss": 0.9158, "step": 5800 }, { "epoch": 0.8173300457907714, "grad_norm": 0.9320431071631283, "learning_rate": 1.6986386245435949e-06, "loss": 0.8006, "step": 5801 }, { "epoch": 0.8174709404719972, "grad_norm": 1.1104167972755137, "learning_rate": 1.6960949963907113e-06, "loss": 0.437, "step": 5802 }, { "epoch": 0.817611835153223, "grad_norm": 1.020348406512084, "learning_rate": 1.6935530976574689e-06, "loss": 0.8112, "step": 5803 }, { "epoch": 0.8177527298344488, "grad_norm": 1.1082594286742058, "learning_rate": 1.6910129288732536e-06, "loss": 0.8471, "step": 5804 }, { "epoch": 0.8178936245156745, "grad_norm": 0.9657505400094384, "learning_rate": 1.6884744905671036e-06, "loss": 0.8217, "step": 5805 }, { "epoch": 0.8180345191969003, "grad_norm": 0.9759458274495028, "learning_rate": 1.6859377832676814e-06, "loss": 0.8546, "step": 5806 }, { "epoch": 0.8181754138781261, "grad_norm": 1.0550634821245322, "learning_rate": 1.6834028075033048e-06, "loss": 0.8368, "step": 5807 }, { "epoch": 0.8183163085593519, "grad_norm": 1.3236518973038867, "learning_rate": 1.6808695638019124e-06, "loss": 0.4538, "step": 5808 }, { "epoch": 0.8184572032405777, "grad_norm": 0.9459334139650043, "learning_rate": 1.6783380526911008e-06, "loss": 0.826, "step": 5809 }, { "epoch": 0.8185980979218035, "grad_norm": 0.9670898856156652, "learning_rate": 1.6758082746980907e-06, "loss": 0.884, "step": 5810 }, { "epoch": 0.8187389926030292, "grad_norm": 1.0518060569725065, "learning_rate": 1.6732802303497554e-06, "loss": 0.8206, "step": 5811 }, { "epoch": 0.818879887284255, "grad_norm": 0.9757069458526068, "learning_rate": 1.6707539201725953e-06, "loss": 0.8176, "step": 5812 }, { "epoch": 0.8190207819654808, "grad_norm": 1.0708541406046554, "learning_rate": 1.6682293446927566e-06, "loss": 0.8488, "step": 5813 }, { "epoch": 0.8191616766467066, "grad_norm": 1.0034947482252417, "learning_rate": 1.6657065044360188e-06, "loss": 0.7574, "step": 5814 }, { "epoch": 0.8193025713279324, "grad_norm": 1.2297906229919024, "learning_rate": 1.663185399927807e-06, "loss": 0.5664, "step": 5815 }, { "epoch": 0.8194434660091582, "grad_norm": 0.9551490297490567, "learning_rate": 1.6606660316931778e-06, "loss": 0.8513, "step": 5816 }, { "epoch": 0.819584360690384, "grad_norm": 1.1966050734116807, "learning_rate": 1.658148400256836e-06, "loss": 0.4375, "step": 5817 }, { "epoch": 0.8197252553716097, "grad_norm": 1.2777192833227802, "learning_rate": 1.65563250614311e-06, "loss": 0.4935, "step": 5818 }, { "epoch": 0.8198661500528355, "grad_norm": 1.1364167412436628, "learning_rate": 1.6531183498759795e-06, "loss": 0.4483, "step": 5819 }, { "epoch": 0.8200070447340613, "grad_norm": 1.0224497734837419, "learning_rate": 1.6506059319790546e-06, "loss": 0.8695, "step": 5820 }, { "epoch": 0.8201479394152871, "grad_norm": 1.066874768438052, "learning_rate": 1.648095252975589e-06, "loss": 0.8295, "step": 5821 }, { "epoch": 0.8202888340965129, "grad_norm": 0.9823390745575802, "learning_rate": 1.6455863133884709e-06, "loss": 0.7736, "step": 5822 }, { "epoch": 0.8204297287777387, "grad_norm": 1.0762118243982928, "learning_rate": 1.6430791137402246e-06, "loss": 0.8687, "step": 5823 }, { "epoch": 0.8205706234589645, "grad_norm": 1.2515568428035666, "learning_rate": 1.6405736545530127e-06, "loss": 0.4416, "step": 5824 }, { "epoch": 0.8207115181401902, "grad_norm": 1.096066709288038, "learning_rate": 1.638069936348643e-06, "loss": 0.4945, "step": 5825 }, { "epoch": 0.820852412821416, "grad_norm": 0.9110323963405114, "learning_rate": 1.6355679596485475e-06, "loss": 0.7923, "step": 5826 }, { "epoch": 0.8209933075026418, "grad_norm": 1.151415243928389, "learning_rate": 1.633067724973808e-06, "loss": 0.8289, "step": 5827 }, { "epoch": 0.8211342021838676, "grad_norm": 1.2659212444621875, "learning_rate": 1.630569232845135e-06, "loss": 0.4223, "step": 5828 }, { "epoch": 0.8212750968650934, "grad_norm": 1.098813754769966, "learning_rate": 1.6280724837828799e-06, "loss": 0.4662, "step": 5829 }, { "epoch": 0.8214159915463192, "grad_norm": 0.9534562211079294, "learning_rate": 1.6255774783070276e-06, "loss": 0.8617, "step": 5830 }, { "epoch": 0.8215568862275449, "grad_norm": 1.180485394186736, "learning_rate": 1.6230842169372073e-06, "loss": 0.8215, "step": 5831 }, { "epoch": 0.8216977809087707, "grad_norm": 1.104746337152171, "learning_rate": 1.620592700192678e-06, "loss": 0.8959, "step": 5832 }, { "epoch": 0.8218386755899965, "grad_norm": 1.0123521614314852, "learning_rate": 1.6181029285923365e-06, "loss": 0.7826, "step": 5833 }, { "epoch": 0.8219795702712223, "grad_norm": 0.9146349250319658, "learning_rate": 1.6156149026547153e-06, "loss": 0.8305, "step": 5834 }, { "epoch": 0.8221204649524481, "grad_norm": 0.8818596054316977, "learning_rate": 1.6131286228979904e-06, "loss": 0.7129, "step": 5835 }, { "epoch": 0.8222613596336739, "grad_norm": 1.3664018594787664, "learning_rate": 1.6106440898399634e-06, "loss": 0.5346, "step": 5836 }, { "epoch": 0.8224022543148997, "grad_norm": 1.0084486081549309, "learning_rate": 1.6081613039980825e-06, "loss": 0.8433, "step": 5837 }, { "epoch": 0.8225431489961254, "grad_norm": 0.9936298074898475, "learning_rate": 1.6056802658894278e-06, "loss": 0.7972, "step": 5838 }, { "epoch": 0.8226840436773512, "grad_norm": 1.211751757114502, "learning_rate": 1.6032009760307065e-06, "loss": 0.4715, "step": 5839 }, { "epoch": 0.822824938358577, "grad_norm": 0.9908315324528733, "learning_rate": 1.6007234349382784e-06, "loss": 0.7698, "step": 5840 }, { "epoch": 0.8229658330398028, "grad_norm": 1.226418375261991, "learning_rate": 1.5982476431281257e-06, "loss": 0.4387, "step": 5841 }, { "epoch": 0.8231067277210286, "grad_norm": 1.068240019738478, "learning_rate": 1.5957736011158753e-06, "loss": 0.8262, "step": 5842 }, { "epoch": 0.8232476224022544, "grad_norm": 0.9235857561872759, "learning_rate": 1.5933013094167826e-06, "loss": 0.8128, "step": 5843 }, { "epoch": 0.8233885170834802, "grad_norm": 1.2347718852344987, "learning_rate": 1.5908307685457436e-06, "loss": 0.5581, "step": 5844 }, { "epoch": 0.8235294117647058, "grad_norm": 1.0626128173922191, "learning_rate": 1.5883619790172832e-06, "loss": 0.8086, "step": 5845 }, { "epoch": 0.8236703064459316, "grad_norm": 1.1618199326588539, "learning_rate": 1.5858949413455715e-06, "loss": 0.8632, "step": 5846 }, { "epoch": 0.8238112011271574, "grad_norm": 1.3180591824957781, "learning_rate": 1.583429656044403e-06, "loss": 0.5435, "step": 5847 }, { "epoch": 0.8239520958083832, "grad_norm": 0.9928419187450678, "learning_rate": 1.58096612362722e-06, "loss": 0.7707, "step": 5848 }, { "epoch": 0.824092990489609, "grad_norm": 1.041299706400331, "learning_rate": 1.5785043446070824e-06, "loss": 0.7984, "step": 5849 }, { "epoch": 0.8242338851708348, "grad_norm": 1.2423231965245791, "learning_rate": 1.5760443194967011e-06, "loss": 0.6322, "step": 5850 }, { "epoch": 0.8243747798520605, "grad_norm": 0.9432093163916693, "learning_rate": 1.5735860488084121e-06, "loss": 0.837, "step": 5851 }, { "epoch": 0.8245156745332863, "grad_norm": 1.2699466485522906, "learning_rate": 1.571129533054192e-06, "loss": 0.8454, "step": 5852 }, { "epoch": 0.8246565692145121, "grad_norm": 1.1870985767516686, "learning_rate": 1.5686747727456487e-06, "loss": 0.8819, "step": 5853 }, { "epoch": 0.8247974638957379, "grad_norm": 0.905532708500094, "learning_rate": 1.566221768394023e-06, "loss": 0.8696, "step": 5854 }, { "epoch": 0.8249383585769637, "grad_norm": 1.3390790467683702, "learning_rate": 1.5637705205101906e-06, "loss": 0.5318, "step": 5855 }, { "epoch": 0.8250792532581895, "grad_norm": 1.1990872788733236, "learning_rate": 1.5613210296046677e-06, "loss": 0.462, "step": 5856 }, { "epoch": 0.8252201479394153, "grad_norm": 1.2717254908356268, "learning_rate": 1.558873296187594e-06, "loss": 0.4895, "step": 5857 }, { "epoch": 0.825361042620641, "grad_norm": 0.9777277363490727, "learning_rate": 1.5564273207687542e-06, "loss": 0.9171, "step": 5858 }, { "epoch": 0.8255019373018668, "grad_norm": 1.0449758840807757, "learning_rate": 1.553983103857558e-06, "loss": 0.775, "step": 5859 }, { "epoch": 0.8256428319830926, "grad_norm": 1.3122560092718005, "learning_rate": 1.5515406459630522e-06, "loss": 0.5135, "step": 5860 }, { "epoch": 0.8257837266643184, "grad_norm": 1.0829119857097236, "learning_rate": 1.5490999475939173e-06, "loss": 0.8278, "step": 5861 }, { "epoch": 0.8259246213455442, "grad_norm": 1.2015701864350463, "learning_rate": 1.5466610092584688e-06, "loss": 0.8185, "step": 5862 }, { "epoch": 0.82606551602677, "grad_norm": 1.2452166232631732, "learning_rate": 1.544223831464654e-06, "loss": 0.565, "step": 5863 }, { "epoch": 0.8262064107079957, "grad_norm": 1.2225558925555389, "learning_rate": 1.5417884147200535e-06, "loss": 0.8561, "step": 5864 }, { "epoch": 0.8263473053892215, "grad_norm": 1.253733031233476, "learning_rate": 1.5393547595318782e-06, "loss": 0.6218, "step": 5865 }, { "epoch": 0.8264882000704473, "grad_norm": 1.1135005873103776, "learning_rate": 1.5369228664069803e-06, "loss": 0.4193, "step": 5866 }, { "epoch": 0.8266290947516731, "grad_norm": 1.016133455124949, "learning_rate": 1.5344927358518357e-06, "loss": 0.8658, "step": 5867 }, { "epoch": 0.8267699894328989, "grad_norm": 0.9953689366230519, "learning_rate": 1.5320643683725632e-06, "loss": 0.7954, "step": 5868 }, { "epoch": 0.8269108841141247, "grad_norm": 1.717768276563676, "learning_rate": 1.5296377644749038e-06, "loss": 0.542, "step": 5869 }, { "epoch": 0.8270517787953505, "grad_norm": 1.1020074043705863, "learning_rate": 1.5272129246642386e-06, "loss": 0.809, "step": 5870 }, { "epoch": 0.8271926734765762, "grad_norm": 1.234716495247785, "learning_rate": 1.5247898494455748e-06, "loss": 0.4909, "step": 5871 }, { "epoch": 0.827333568157802, "grad_norm": 1.1955274849882818, "learning_rate": 1.5223685393235631e-06, "loss": 0.4373, "step": 5872 }, { "epoch": 0.8274744628390278, "grad_norm": 0.9719864122577407, "learning_rate": 1.5199489948024749e-06, "loss": 0.8268, "step": 5873 }, { "epoch": 0.8276153575202536, "grad_norm": 1.3414558186912988, "learning_rate": 1.5175312163862189e-06, "loss": 0.4433, "step": 5874 }, { "epoch": 0.8277562522014794, "grad_norm": 1.0366936672424096, "learning_rate": 1.5151152045783346e-06, "loss": 0.8187, "step": 5875 }, { "epoch": 0.8278971468827052, "grad_norm": 1.0222739760022383, "learning_rate": 1.5127009598819997e-06, "loss": 0.7875, "step": 5876 }, { "epoch": 0.828038041563931, "grad_norm": 1.3236123866333862, "learning_rate": 1.510288482800012e-06, "loss": 0.4749, "step": 5877 }, { "epoch": 0.8281789362451567, "grad_norm": 1.213581693815818, "learning_rate": 1.5078777738348149e-06, "loss": 0.4606, "step": 5878 }, { "epoch": 0.8283198309263825, "grad_norm": 0.9961973401712798, "learning_rate": 1.5054688334884737e-06, "loss": 0.8102, "step": 5879 }, { "epoch": 0.8284607256076083, "grad_norm": 1.1938708380750311, "learning_rate": 1.503061662262687e-06, "loss": 0.5312, "step": 5880 }, { "epoch": 0.8286016202888341, "grad_norm": 0.9819174105661259, "learning_rate": 1.5006562606587882e-06, "loss": 0.808, "step": 5881 }, { "epoch": 0.8287425149700599, "grad_norm": 1.027171163351711, "learning_rate": 1.4982526291777377e-06, "loss": 0.7768, "step": 5882 }, { "epoch": 0.8288834096512857, "grad_norm": 1.0951757607930137, "learning_rate": 1.495850768320134e-06, "loss": 0.9186, "step": 5883 }, { "epoch": 0.8290243043325114, "grad_norm": 0.9616123771268068, "learning_rate": 1.493450678586199e-06, "loss": 0.872, "step": 5884 }, { "epoch": 0.8291651990137372, "grad_norm": 1.1712091474053028, "learning_rate": 1.491052360475792e-06, "loss": 0.499, "step": 5885 }, { "epoch": 0.829306093694963, "grad_norm": 1.1290403656211823, "learning_rate": 1.4886558144883957e-06, "loss": 0.4371, "step": 5886 }, { "epoch": 0.8294469883761888, "grad_norm": 1.3613340524918596, "learning_rate": 1.486261041123136e-06, "loss": 0.6066, "step": 5887 }, { "epoch": 0.8295878830574146, "grad_norm": 0.9722712357733172, "learning_rate": 1.4838680408787554e-06, "loss": 0.8298, "step": 5888 }, { "epoch": 0.8297287777386404, "grad_norm": 1.1391674440204198, "learning_rate": 1.4814768142536394e-06, "loss": 0.8519, "step": 5889 }, { "epoch": 0.8298696724198662, "grad_norm": 0.9554120441523661, "learning_rate": 1.4790873617457978e-06, "loss": 0.7999, "step": 5890 }, { "epoch": 0.8300105671010919, "grad_norm": 1.2197265854149886, "learning_rate": 1.4766996838528703e-06, "loss": 0.8268, "step": 5891 }, { "epoch": 0.8301514617823177, "grad_norm": 1.2402844299914202, "learning_rate": 1.4743137810721275e-06, "loss": 0.4355, "step": 5892 }, { "epoch": 0.8302923564635435, "grad_norm": 1.2810699078826082, "learning_rate": 1.4719296539004745e-06, "loss": 0.5636, "step": 5893 }, { "epoch": 0.8304332511447693, "grad_norm": 0.9917503567705427, "learning_rate": 1.4695473028344432e-06, "loss": 0.8907, "step": 5894 }, { "epoch": 0.8305741458259951, "grad_norm": 1.2881037507992474, "learning_rate": 1.467166728370194e-06, "loss": 0.4482, "step": 5895 }, { "epoch": 0.8307150405072209, "grad_norm": 1.1709010630219296, "learning_rate": 1.4647879310035185e-06, "loss": 0.8532, "step": 5896 }, { "epoch": 0.8308559351884466, "grad_norm": 1.0150286364684236, "learning_rate": 1.4624109112298412e-06, "loss": 0.8453, "step": 5897 }, { "epoch": 0.8309968298696724, "grad_norm": 1.260310210659352, "learning_rate": 1.460035669544213e-06, "loss": 0.6051, "step": 5898 }, { "epoch": 0.8311377245508982, "grad_norm": 0.9512350055345868, "learning_rate": 1.457662206441317e-06, "loss": 0.8622, "step": 5899 }, { "epoch": 0.831278619232124, "grad_norm": 1.2515848472165836, "learning_rate": 1.4552905224154635e-06, "loss": 0.8094, "step": 5900 }, { "epoch": 0.8314195139133498, "grad_norm": 1.3066781778644614, "learning_rate": 1.4529206179605936e-06, "loss": 0.5203, "step": 5901 }, { "epoch": 0.8315604085945756, "grad_norm": 0.9579455784227605, "learning_rate": 1.4505524935702741e-06, "loss": 0.8487, "step": 5902 }, { "epoch": 0.8317013032758014, "grad_norm": 1.109657011294502, "learning_rate": 1.4481861497377092e-06, "loss": 0.8382, "step": 5903 }, { "epoch": 0.8318421979570271, "grad_norm": 1.254853086793654, "learning_rate": 1.4458215869557256e-06, "loss": 0.4613, "step": 5904 }, { "epoch": 0.8319830926382529, "grad_norm": 0.9876493476494941, "learning_rate": 1.443458805716782e-06, "loss": 0.8286, "step": 5905 }, { "epoch": 0.8321239873194787, "grad_norm": 0.968172717895505, "learning_rate": 1.44109780651296e-06, "loss": 0.7595, "step": 5906 }, { "epoch": 0.8322648820007045, "grad_norm": 1.2828097572123929, "learning_rate": 1.4387385898359817e-06, "loss": 0.5482, "step": 5907 }, { "epoch": 0.8324057766819303, "grad_norm": 1.3933881294079205, "learning_rate": 1.4363811561771857e-06, "loss": 0.5418, "step": 5908 }, { "epoch": 0.8325466713631561, "grad_norm": 1.0585168895639654, "learning_rate": 1.434025506027551e-06, "loss": 0.8851, "step": 5909 }, { "epoch": 0.8326875660443819, "grad_norm": 1.1641817462650725, "learning_rate": 1.4316716398776743e-06, "loss": 0.8291, "step": 5910 }, { "epoch": 0.8328284607256076, "grad_norm": 0.973463113757858, "learning_rate": 1.4293195582177877e-06, "loss": 0.8366, "step": 5911 }, { "epoch": 0.8329693554068334, "grad_norm": 0.9998289802426703, "learning_rate": 1.4269692615377461e-06, "loss": 0.848, "step": 5912 }, { "epoch": 0.8331102500880592, "grad_norm": 1.1457516111681876, "learning_rate": 1.4246207503270405e-06, "loss": 0.848, "step": 5913 }, { "epoch": 0.833251144769285, "grad_norm": 1.0823918060770383, "learning_rate": 1.4222740250747801e-06, "loss": 0.8981, "step": 5914 }, { "epoch": 0.8333920394505108, "grad_norm": 1.1499976987628089, "learning_rate": 1.4199290862697169e-06, "loss": 0.8933, "step": 5915 }, { "epoch": 0.8335329341317366, "grad_norm": 0.9861586466245653, "learning_rate": 1.4175859344002108e-06, "loss": 0.7709, "step": 5916 }, { "epoch": 0.8336738288129623, "grad_norm": 1.2887931695535586, "learning_rate": 1.4152445699542671e-06, "loss": 0.3868, "step": 5917 }, { "epoch": 0.8338147234941881, "grad_norm": 1.026029543486005, "learning_rate": 1.4129049934195071e-06, "loss": 0.786, "step": 5918 }, { "epoch": 0.8339556181754139, "grad_norm": 1.3280508175557737, "learning_rate": 1.4105672052831909e-06, "loss": 0.4704, "step": 5919 }, { "epoch": 0.8340965128566397, "grad_norm": 1.0905761431819279, "learning_rate": 1.4082312060321955e-06, "loss": 0.4669, "step": 5920 }, { "epoch": 0.8342374075378655, "grad_norm": 1.1175438045785269, "learning_rate": 1.40589699615303e-06, "loss": 0.8564, "step": 5921 }, { "epoch": 0.8343783022190913, "grad_norm": 0.9444352732407719, "learning_rate": 1.4035645761318329e-06, "loss": 0.8383, "step": 5922 }, { "epoch": 0.8345191969003171, "grad_norm": 1.2325935949071578, "learning_rate": 1.4012339464543624e-06, "loss": 0.8082, "step": 5923 }, { "epoch": 0.8346600915815428, "grad_norm": 1.08585648530843, "learning_rate": 1.3989051076060144e-06, "loss": 0.8355, "step": 5924 }, { "epoch": 0.8348009862627686, "grad_norm": 0.944212623296333, "learning_rate": 1.3965780600718059e-06, "loss": 0.7856, "step": 5925 }, { "epoch": 0.8349418809439944, "grad_norm": 1.1545913134137458, "learning_rate": 1.394252804336379e-06, "loss": 0.8312, "step": 5926 }, { "epoch": 0.8350827756252202, "grad_norm": 0.9519332123099215, "learning_rate": 1.391929340884004e-06, "loss": 0.8119, "step": 5927 }, { "epoch": 0.835223670306446, "grad_norm": 1.0585405050726002, "learning_rate": 1.3896076701985827e-06, "loss": 0.7993, "step": 5928 }, { "epoch": 0.8353645649876718, "grad_norm": 1.2325030990331691, "learning_rate": 1.3872877927636364e-06, "loss": 0.4425, "step": 5929 }, { "epoch": 0.8355054596688976, "grad_norm": 1.2128814510490762, "learning_rate": 1.3849697090623204e-06, "loss": 0.4994, "step": 5930 }, { "epoch": 0.8356463543501232, "grad_norm": 0.9417877537331695, "learning_rate": 1.3826534195774088e-06, "loss": 0.7619, "step": 5931 }, { "epoch": 0.835787249031349, "grad_norm": 0.9842490757488178, "learning_rate": 1.380338924791308e-06, "loss": 0.7852, "step": 5932 }, { "epoch": 0.8359281437125748, "grad_norm": 1.1274912287305487, "learning_rate": 1.3780262251860433e-06, "loss": 0.8272, "step": 5933 }, { "epoch": 0.8360690383938006, "grad_norm": 1.168170450842724, "learning_rate": 1.3757153212432772e-06, "loss": 0.3939, "step": 5934 }, { "epoch": 0.8362099330750264, "grad_norm": 1.3919818058279152, "learning_rate": 1.3734062134442894e-06, "loss": 0.5355, "step": 5935 }, { "epoch": 0.8363508277562522, "grad_norm": 1.2654714633539321, "learning_rate": 1.3710989022699872e-06, "loss": 0.4886, "step": 5936 }, { "epoch": 0.8364917224374779, "grad_norm": 0.9881915187320273, "learning_rate": 1.3687933882009042e-06, "loss": 0.8018, "step": 5937 }, { "epoch": 0.8366326171187037, "grad_norm": 1.0513514318325372, "learning_rate": 1.3664896717172028e-06, "loss": 0.8473, "step": 5938 }, { "epoch": 0.8367735117999295, "grad_norm": 1.0428765614089381, "learning_rate": 1.3641877532986659e-06, "loss": 0.8745, "step": 5939 }, { "epoch": 0.8369144064811553, "grad_norm": 1.2241306407234704, "learning_rate": 1.3618876334247067e-06, "loss": 0.8594, "step": 5940 }, { "epoch": 0.8370553011623811, "grad_norm": 1.1004535275251637, "learning_rate": 1.3595893125743608e-06, "loss": 0.7975, "step": 5941 }, { "epoch": 0.8371961958436069, "grad_norm": 1.138023496503658, "learning_rate": 1.35729279122629e-06, "loss": 0.4224, "step": 5942 }, { "epoch": 0.8373370905248327, "grad_norm": 1.2093640277177757, "learning_rate": 1.3549980698587784e-06, "loss": 0.4225, "step": 5943 }, { "epoch": 0.8374779852060584, "grad_norm": 1.0079957899916647, "learning_rate": 1.3527051489497423e-06, "loss": 0.8294, "step": 5944 }, { "epoch": 0.8376188798872842, "grad_norm": 1.0263712865153376, "learning_rate": 1.3504140289767153e-06, "loss": 0.9114, "step": 5945 }, { "epoch": 0.83775977456851, "grad_norm": 1.0732575657578407, "learning_rate": 1.3481247104168648e-06, "loss": 0.4783, "step": 5946 }, { "epoch": 0.8379006692497358, "grad_norm": 1.119736853501494, "learning_rate": 1.3458371937469704e-06, "loss": 0.8341, "step": 5947 }, { "epoch": 0.8380415639309616, "grad_norm": 1.0884557091516305, "learning_rate": 1.3435514794434478e-06, "loss": 0.7914, "step": 5948 }, { "epoch": 0.8381824586121874, "grad_norm": 1.032430814634617, "learning_rate": 1.3412675679823318e-06, "loss": 0.3763, "step": 5949 }, { "epoch": 0.8383233532934131, "grad_norm": 0.953518523742405, "learning_rate": 1.3389854598392848e-06, "loss": 0.8304, "step": 5950 }, { "epoch": 0.8384642479746389, "grad_norm": 1.365327479862543, "learning_rate": 1.3367051554895916e-06, "loss": 0.4854, "step": 5951 }, { "epoch": 0.8386051426558647, "grad_norm": 1.0602105395933132, "learning_rate": 1.334426655408161e-06, "loss": 0.8648, "step": 5952 }, { "epoch": 0.8387460373370905, "grad_norm": 0.9487644853419124, "learning_rate": 1.3321499600695241e-06, "loss": 0.7847, "step": 5953 }, { "epoch": 0.8388869320183163, "grad_norm": 1.0882792630649518, "learning_rate": 1.3298750699478435e-06, "loss": 0.8344, "step": 5954 }, { "epoch": 0.8390278266995421, "grad_norm": 1.303429827206259, "learning_rate": 1.3276019855168965e-06, "loss": 0.6154, "step": 5955 }, { "epoch": 0.8391687213807679, "grad_norm": 1.075359361494074, "learning_rate": 1.3253307072500966e-06, "loss": 0.8733, "step": 5956 }, { "epoch": 0.8393096160619936, "grad_norm": 1.089236939091016, "learning_rate": 1.323061235620463e-06, "loss": 0.8663, "step": 5957 }, { "epoch": 0.8394505107432194, "grad_norm": 1.1304535042443473, "learning_rate": 1.3207935711006559e-06, "loss": 0.8922, "step": 5958 }, { "epoch": 0.8395914054244452, "grad_norm": 1.1355726376820237, "learning_rate": 1.3185277141629493e-06, "loss": 0.4977, "step": 5959 }, { "epoch": 0.839732300105671, "grad_norm": 1.0096079320578437, "learning_rate": 1.316263665279247e-06, "loss": 0.8622, "step": 5960 }, { "epoch": 0.8398731947868968, "grad_norm": 0.9104635699670643, "learning_rate": 1.314001424921072e-06, "loss": 0.7914, "step": 5961 }, { "epoch": 0.8400140894681226, "grad_norm": 1.0905030497684551, "learning_rate": 1.3117409935595693e-06, "loss": 0.4341, "step": 5962 }, { "epoch": 0.8401549841493484, "grad_norm": 1.1909164864034067, "learning_rate": 1.3094823716655102e-06, "loss": 0.4703, "step": 5963 }, { "epoch": 0.8402958788305741, "grad_norm": 1.1376881944327912, "learning_rate": 1.3072255597092908e-06, "loss": 0.8455, "step": 5964 }, { "epoch": 0.8404367735117999, "grad_norm": 1.0699511500252143, "learning_rate": 1.3049705581609262e-06, "loss": 0.8441, "step": 5965 }, { "epoch": 0.8405776681930257, "grad_norm": 0.9054695369083593, "learning_rate": 1.3027173674900562e-06, "loss": 0.8099, "step": 5966 }, { "epoch": 0.8407185628742515, "grad_norm": 0.9378237568736246, "learning_rate": 1.3004659881659443e-06, "loss": 0.8256, "step": 5967 }, { "epoch": 0.8408594575554773, "grad_norm": 1.1757242188640387, "learning_rate": 1.298216420657472e-06, "loss": 0.8589, "step": 5968 }, { "epoch": 0.8410003522367031, "grad_norm": 1.042523444492046, "learning_rate": 1.2959686654331526e-06, "loss": 0.8402, "step": 5969 }, { "epoch": 0.8411412469179288, "grad_norm": 1.0365310724421808, "learning_rate": 1.2937227229611127e-06, "loss": 0.8253, "step": 5970 }, { "epoch": 0.8412821415991546, "grad_norm": 0.9863264930987822, "learning_rate": 1.2914785937091101e-06, "loss": 0.8504, "step": 5971 }, { "epoch": 0.8414230362803804, "grad_norm": 1.2396750756646508, "learning_rate": 1.2892362781445167e-06, "loss": 0.5395, "step": 5972 }, { "epoch": 0.8415639309616062, "grad_norm": 0.9500417103173564, "learning_rate": 1.286995776734331e-06, "loss": 0.8414, "step": 5973 }, { "epoch": 0.841704825642832, "grad_norm": 1.0495230499620316, "learning_rate": 1.28475708994517e-06, "loss": 0.9053, "step": 5974 }, { "epoch": 0.8418457203240578, "grad_norm": 1.0477459202523003, "learning_rate": 1.2825202182432817e-06, "loss": 0.8835, "step": 5975 }, { "epoch": 0.8419866150052836, "grad_norm": 1.3850055430295625, "learning_rate": 1.2802851620945234e-06, "loss": 0.513, "step": 5976 }, { "epoch": 0.8421275096865093, "grad_norm": 0.9991296125922189, "learning_rate": 1.2780519219643894e-06, "loss": 0.8961, "step": 5977 }, { "epoch": 0.8422684043677351, "grad_norm": 1.114488215912527, "learning_rate": 1.2758204983179778e-06, "loss": 0.8199, "step": 5978 }, { "epoch": 0.8424092990489609, "grad_norm": 0.9419400495231534, "learning_rate": 1.2735908916200235e-06, "loss": 0.7851, "step": 5979 }, { "epoch": 0.8425501937301867, "grad_norm": 0.9386548285425028, "learning_rate": 1.271363102334875e-06, "loss": 0.8426, "step": 5980 }, { "epoch": 0.8426910884114125, "grad_norm": 1.1615706352149602, "learning_rate": 1.2691371309265065e-06, "loss": 0.5232, "step": 5981 }, { "epoch": 0.8428319830926383, "grad_norm": 1.1335618029419643, "learning_rate": 1.266912977858512e-06, "loss": 0.9166, "step": 5982 }, { "epoch": 0.842972877773864, "grad_norm": 0.9468866830764563, "learning_rate": 1.2646906435941063e-06, "loss": 0.783, "step": 5983 }, { "epoch": 0.8431137724550898, "grad_norm": 1.0172726000959695, "learning_rate": 1.2624701285961228e-06, "loss": 0.8071, "step": 5984 }, { "epoch": 0.8432546671363156, "grad_norm": 1.1992140512356446, "learning_rate": 1.2602514333270222e-06, "loss": 0.8288, "step": 5985 }, { "epoch": 0.8433955618175414, "grad_norm": 0.9872953483338622, "learning_rate": 1.2580345582488795e-06, "loss": 0.8781, "step": 5986 }, { "epoch": 0.8435364564987672, "grad_norm": 1.062421393117502, "learning_rate": 1.2558195038234022e-06, "loss": 0.8813, "step": 5987 }, { "epoch": 0.843677351179993, "grad_norm": 1.093910711834524, "learning_rate": 1.2536062705118989e-06, "loss": 0.8388, "step": 5988 }, { "epoch": 0.8438182458612188, "grad_norm": 1.4360294287497297, "learning_rate": 1.2513948587753189e-06, "loss": 0.5513, "step": 5989 }, { "epoch": 0.8439591405424445, "grad_norm": 0.9230039615245418, "learning_rate": 1.249185269074219e-06, "loss": 0.7992, "step": 5990 }, { "epoch": 0.8441000352236703, "grad_norm": 1.0496240711704448, "learning_rate": 1.2469775018687846e-06, "loss": 0.8451, "step": 5991 }, { "epoch": 0.8442409299048961, "grad_norm": 0.9936049864296311, "learning_rate": 1.2447715576188168e-06, "loss": 0.8513, "step": 5992 }, { "epoch": 0.8443818245861219, "grad_norm": 1.0542551234347306, "learning_rate": 1.2425674367837393e-06, "loss": 0.8458, "step": 5993 }, { "epoch": 0.8445227192673477, "grad_norm": 0.9916787408365192, "learning_rate": 1.2403651398225913e-06, "loss": 0.8132, "step": 5994 }, { "epoch": 0.8446636139485735, "grad_norm": 1.1392181577850515, "learning_rate": 1.2381646671940418e-06, "loss": 0.4493, "step": 5995 }, { "epoch": 0.8448045086297993, "grad_norm": 0.9906497957504421, "learning_rate": 1.235966019356367e-06, "loss": 0.8906, "step": 5996 }, { "epoch": 0.844945403311025, "grad_norm": 1.1762019312240755, "learning_rate": 1.2337691967674781e-06, "loss": 0.826, "step": 5997 }, { "epoch": 0.8450862979922508, "grad_norm": 1.1635813734552474, "learning_rate": 1.231574199884894e-06, "loss": 0.9216, "step": 5998 }, { "epoch": 0.8452271926734766, "grad_norm": 1.0170780058217916, "learning_rate": 1.2293810291657581e-06, "loss": 0.8291, "step": 5999 }, { "epoch": 0.8453680873547024, "grad_norm": 1.302035127621289, "learning_rate": 1.2271896850668297e-06, "loss": 0.4951, "step": 6000 }, { "epoch": 0.8455089820359282, "grad_norm": 1.391790167461859, "learning_rate": 1.2250001680444967e-06, "loss": 0.5545, "step": 6001 }, { "epoch": 0.845649876717154, "grad_norm": 1.0081264730392328, "learning_rate": 1.222812478554758e-06, "loss": 0.8335, "step": 6002 }, { "epoch": 0.8457907713983797, "grad_norm": 1.1904258288515692, "learning_rate": 1.2206266170532332e-06, "loss": 0.4367, "step": 6003 }, { "epoch": 0.8459316660796055, "grad_norm": 1.2586119646431497, "learning_rate": 1.218442583995163e-06, "loss": 0.4295, "step": 6004 }, { "epoch": 0.8460725607608313, "grad_norm": 1.2593241805422448, "learning_rate": 1.216260379835409e-06, "loss": 0.429, "step": 6005 }, { "epoch": 0.8462134554420571, "grad_norm": 0.9239190045481793, "learning_rate": 1.2140800050284484e-06, "loss": 0.7755, "step": 6006 }, { "epoch": 0.8463543501232829, "grad_norm": 1.204591724725488, "learning_rate": 1.2119014600283762e-06, "loss": 0.5535, "step": 6007 }, { "epoch": 0.8464952448045087, "grad_norm": 0.9985773406140335, "learning_rate": 1.209724745288916e-06, "loss": 0.829, "step": 6008 }, { "epoch": 0.8466361394857345, "grad_norm": 1.1472216652263285, "learning_rate": 1.207549861263395e-06, "loss": 0.4332, "step": 6009 }, { "epoch": 0.8467770341669602, "grad_norm": 1.2239711596551976, "learning_rate": 1.2053768084047723e-06, "loss": 0.4946, "step": 6010 }, { "epoch": 0.846917928848186, "grad_norm": 0.9989915898374871, "learning_rate": 1.2032055871656168e-06, "loss": 0.798, "step": 6011 }, { "epoch": 0.8470588235294118, "grad_norm": 1.0620696520743653, "learning_rate": 1.2010361979981232e-06, "loss": 0.8723, "step": 6012 }, { "epoch": 0.8471997182106376, "grad_norm": 1.2501160135509926, "learning_rate": 1.1988686413541006e-06, "loss": 0.4889, "step": 6013 }, { "epoch": 0.8473406128918634, "grad_norm": 1.2585826440068368, "learning_rate": 1.1967029176849754e-06, "loss": 0.5479, "step": 6014 }, { "epoch": 0.8474815075730892, "grad_norm": 1.0383062881235028, "learning_rate": 1.1945390274417933e-06, "loss": 0.7817, "step": 6015 }, { "epoch": 0.847622402254315, "grad_norm": 1.4130015051036735, "learning_rate": 1.192376971075222e-06, "loss": 0.6109, "step": 6016 }, { "epoch": 0.8477632969355406, "grad_norm": 0.8976166167479428, "learning_rate": 1.1902167490355387e-06, "loss": 0.8075, "step": 6017 }, { "epoch": 0.8479041916167664, "grad_norm": 1.0995569325271692, "learning_rate": 1.1880583617726526e-06, "loss": 0.9243, "step": 6018 }, { "epoch": 0.8480450862979922, "grad_norm": 1.083023140412171, "learning_rate": 1.1859018097360719e-06, "loss": 0.8517, "step": 6019 }, { "epoch": 0.848185980979218, "grad_norm": 1.0612841067068781, "learning_rate": 1.1837470933749383e-06, "loss": 0.8737, "step": 6020 }, { "epoch": 0.8483268756604438, "grad_norm": 1.0764748092008072, "learning_rate": 1.1815942131380042e-06, "loss": 0.8028, "step": 6021 }, { "epoch": 0.8484677703416696, "grad_norm": 1.0365973437354152, "learning_rate": 1.1794431694736419e-06, "loss": 0.8119, "step": 6022 }, { "epoch": 0.8486086650228953, "grad_norm": 1.045623259031043, "learning_rate": 1.1772939628298408e-06, "loss": 0.8484, "step": 6023 }, { "epoch": 0.8487495597041211, "grad_norm": 1.0022413401135808, "learning_rate": 1.175146593654205e-06, "loss": 0.8205, "step": 6024 }, { "epoch": 0.8488904543853469, "grad_norm": 1.2041922674971324, "learning_rate": 1.173001062393958e-06, "loss": 0.499, "step": 6025 }, { "epoch": 0.8490313490665727, "grad_norm": 0.9547698778171171, "learning_rate": 1.1708573694959434e-06, "loss": 0.8544, "step": 6026 }, { "epoch": 0.8491722437477985, "grad_norm": 0.9385707715580429, "learning_rate": 1.1687155154066154e-06, "loss": 0.8138, "step": 6027 }, { "epoch": 0.8493131384290243, "grad_norm": 0.8982745755454985, "learning_rate": 1.1665755005720548e-06, "loss": 0.7943, "step": 6028 }, { "epoch": 0.8494540331102501, "grad_norm": 1.0663219413155758, "learning_rate": 1.1644373254379482e-06, "loss": 0.7958, "step": 6029 }, { "epoch": 0.8495949277914758, "grad_norm": 1.1306100841939613, "learning_rate": 1.1623009904496075e-06, "loss": 0.8818, "step": 6030 }, { "epoch": 0.8497358224727016, "grad_norm": 0.9997652247684283, "learning_rate": 1.1601664960519544e-06, "loss": 0.8984, "step": 6031 }, { "epoch": 0.8498767171539274, "grad_norm": 1.0516838185730648, "learning_rate": 1.1580338426895355e-06, "loss": 0.8264, "step": 6032 }, { "epoch": 0.8500176118351532, "grad_norm": 1.0613972164496392, "learning_rate": 1.1559030308065077e-06, "loss": 0.8774, "step": 6033 }, { "epoch": 0.850158506516379, "grad_norm": 0.9533094097605272, "learning_rate": 1.1537740608466463e-06, "loss": 0.8703, "step": 6034 }, { "epoch": 0.8502994011976048, "grad_norm": 1.0888915944666884, "learning_rate": 1.151646933253342e-06, "loss": 0.825, "step": 6035 }, { "epoch": 0.8504402958788305, "grad_norm": 1.0819937551242067, "learning_rate": 1.1495216484696058e-06, "loss": 0.8708, "step": 6036 }, { "epoch": 0.8505811905600563, "grad_norm": 1.093087092357308, "learning_rate": 1.1473982069380573e-06, "loss": 0.8017, "step": 6037 }, { "epoch": 0.8507220852412821, "grad_norm": 1.0186239306795297, "learning_rate": 1.1452766091009416e-06, "loss": 0.8351, "step": 6038 }, { "epoch": 0.8508629799225079, "grad_norm": 1.0427711096471715, "learning_rate": 1.1431568554001138e-06, "loss": 0.8583, "step": 6039 }, { "epoch": 0.8510038746037337, "grad_norm": 1.0747550611348347, "learning_rate": 1.1410389462770444e-06, "loss": 0.8487, "step": 6040 }, { "epoch": 0.8511447692849595, "grad_norm": 0.9742494052300746, "learning_rate": 1.1389228821728204e-06, "loss": 0.7903, "step": 6041 }, { "epoch": 0.8512856639661853, "grad_norm": 1.1929965318121813, "learning_rate": 1.1368086635281505e-06, "loss": 0.4588, "step": 6042 }, { "epoch": 0.851426558647411, "grad_norm": 1.01708401085797, "learning_rate": 1.1346962907833503e-06, "loss": 0.8548, "step": 6043 }, { "epoch": 0.8515674533286368, "grad_norm": 1.0143583752219731, "learning_rate": 1.1325857643783567e-06, "loss": 0.8564, "step": 6044 }, { "epoch": 0.8517083480098626, "grad_norm": 0.9627197632517797, "learning_rate": 1.1304770847527168e-06, "loss": 0.8259, "step": 6045 }, { "epoch": 0.8518492426910884, "grad_norm": 0.966135660506404, "learning_rate": 1.1283702523456008e-06, "loss": 0.8121, "step": 6046 }, { "epoch": 0.8519901373723142, "grad_norm": 1.3447683919653823, "learning_rate": 1.1262652675957875e-06, "loss": 0.4808, "step": 6047 }, { "epoch": 0.85213103205354, "grad_norm": 1.032193930250685, "learning_rate": 1.124162130941675e-06, "loss": 0.8056, "step": 6048 }, { "epoch": 0.8522719267347658, "grad_norm": 1.1289187385552293, "learning_rate": 1.1220608428212764e-06, "loss": 0.8325, "step": 6049 }, { "epoch": 0.8524128214159915, "grad_norm": 1.0789250997915363, "learning_rate": 1.1199614036722106e-06, "loss": 0.8344, "step": 6050 }, { "epoch": 0.8525537160972173, "grad_norm": 1.0788852981971064, "learning_rate": 1.1178638139317265e-06, "loss": 0.8502, "step": 6051 }, { "epoch": 0.8526946107784431, "grad_norm": 1.03236474813836, "learning_rate": 1.115768074036676e-06, "loss": 0.8331, "step": 6052 }, { "epoch": 0.8528355054596689, "grad_norm": 1.3101741914800487, "learning_rate": 1.1136741844235344e-06, "loss": 0.5831, "step": 6053 }, { "epoch": 0.8529764001408947, "grad_norm": 1.0569867259260017, "learning_rate": 1.1115821455283849e-06, "loss": 0.9256, "step": 6054 }, { "epoch": 0.8531172948221205, "grad_norm": 1.280312063943803, "learning_rate": 1.1094919577869267e-06, "loss": 0.5135, "step": 6055 }, { "epoch": 0.8532581895033462, "grad_norm": 0.9931797363490406, "learning_rate": 1.1074036216344741e-06, "loss": 0.854, "step": 6056 }, { "epoch": 0.853399084184572, "grad_norm": 1.0162324718206979, "learning_rate": 1.1053171375059601e-06, "loss": 0.8714, "step": 6057 }, { "epoch": 0.8535399788657978, "grad_norm": 1.2057971441182147, "learning_rate": 1.1032325058359228e-06, "loss": 0.4403, "step": 6058 }, { "epoch": 0.8536808735470236, "grad_norm": 1.273240497639445, "learning_rate": 1.1011497270585247e-06, "loss": 0.5128, "step": 6059 }, { "epoch": 0.8538217682282494, "grad_norm": 1.0852124154266005, "learning_rate": 1.0990688016075357e-06, "loss": 0.8048, "step": 6060 }, { "epoch": 0.8539626629094752, "grad_norm": 1.0629961271306378, "learning_rate": 1.09698972991634e-06, "loss": 0.8476, "step": 6061 }, { "epoch": 0.854103557590701, "grad_norm": 1.1744819015365708, "learning_rate": 1.0949125124179372e-06, "loss": 0.4451, "step": 6062 }, { "epoch": 0.8542444522719267, "grad_norm": 1.0972815824035136, "learning_rate": 1.092837149544943e-06, "loss": 0.7942, "step": 6063 }, { "epoch": 0.8543853469531525, "grad_norm": 0.9787908288498173, "learning_rate": 1.0907636417295841e-06, "loss": 0.842, "step": 6064 }, { "epoch": 0.8545262416343783, "grad_norm": 1.1694881878348886, "learning_rate": 1.0886919894036996e-06, "loss": 0.5186, "step": 6065 }, { "epoch": 0.8546671363156041, "grad_norm": 1.1782698888468341, "learning_rate": 1.086622192998743e-06, "loss": 0.8393, "step": 6066 }, { "epoch": 0.8548080309968299, "grad_norm": 0.9424954365526911, "learning_rate": 1.0845542529457854e-06, "loss": 0.8453, "step": 6067 }, { "epoch": 0.8549489256780557, "grad_norm": 0.9747614785464889, "learning_rate": 1.0824881696755052e-06, "loss": 0.8666, "step": 6068 }, { "epoch": 0.8550898203592814, "grad_norm": 1.030530916037347, "learning_rate": 1.0804239436181996e-06, "loss": 0.8118, "step": 6069 }, { "epoch": 0.8552307150405072, "grad_norm": 1.3729501754108688, "learning_rate": 1.0783615752037758e-06, "loss": 0.5188, "step": 6070 }, { "epoch": 0.855371609721733, "grad_norm": 1.3309628101225217, "learning_rate": 1.0763010648617533e-06, "loss": 0.5291, "step": 6071 }, { "epoch": 0.8555125044029588, "grad_norm": 1.1921710995365722, "learning_rate": 1.0742424130212648e-06, "loss": 0.4431, "step": 6072 }, { "epoch": 0.8556533990841846, "grad_norm": 1.1530995391169219, "learning_rate": 1.0721856201110602e-06, "loss": 0.586, "step": 6073 }, { "epoch": 0.8557942937654104, "grad_norm": 1.029017604959429, "learning_rate": 1.0701306865594986e-06, "loss": 0.7869, "step": 6074 }, { "epoch": 0.8559351884466362, "grad_norm": 1.2270012506087389, "learning_rate": 1.0680776127945524e-06, "loss": 0.485, "step": 6075 }, { "epoch": 0.8560760831278619, "grad_norm": 0.950727456384682, "learning_rate": 1.0660263992438025e-06, "loss": 0.8133, "step": 6076 }, { "epoch": 0.8562169778090877, "grad_norm": 0.9113759542998892, "learning_rate": 1.0639770463344533e-06, "loss": 0.8217, "step": 6077 }, { "epoch": 0.8563578724903135, "grad_norm": 1.065047222176384, "learning_rate": 1.0619295544933095e-06, "loss": 0.7815, "step": 6078 }, { "epoch": 0.8564987671715393, "grad_norm": 1.271492728422913, "learning_rate": 1.0598839241467973e-06, "loss": 0.4654, "step": 6079 }, { "epoch": 0.8566396618527651, "grad_norm": 1.0176268606282226, "learning_rate": 1.0578401557209516e-06, "loss": 0.8411, "step": 6080 }, { "epoch": 0.8567805565339909, "grad_norm": 1.0028881262636036, "learning_rate": 1.0557982496414176e-06, "loss": 0.852, "step": 6081 }, { "epoch": 0.8569214512152167, "grad_norm": 1.0738066729227804, "learning_rate": 1.053758206333453e-06, "loss": 0.7875, "step": 6082 }, { "epoch": 0.8570623458964424, "grad_norm": 1.0210885626468742, "learning_rate": 1.0517200262219328e-06, "loss": 0.872, "step": 6083 }, { "epoch": 0.8572032405776682, "grad_norm": 0.9492479026134095, "learning_rate": 1.0496837097313373e-06, "loss": 0.862, "step": 6084 }, { "epoch": 0.857344135258894, "grad_norm": 1.0521163633740138, "learning_rate": 1.0476492572857667e-06, "loss": 0.8545, "step": 6085 }, { "epoch": 0.8574850299401198, "grad_norm": 1.0848941335833033, "learning_rate": 1.04561666930892e-06, "loss": 0.8288, "step": 6086 }, { "epoch": 0.8576259246213456, "grad_norm": 1.2832988796222742, "learning_rate": 1.0435859462241227e-06, "loss": 0.5051, "step": 6087 }, { "epoch": 0.8577668193025714, "grad_norm": 0.9655040646042189, "learning_rate": 1.0415570884543003e-06, "loss": 0.8922, "step": 6088 }, { "epoch": 0.8579077139837971, "grad_norm": 1.0118728863948816, "learning_rate": 1.0395300964219989e-06, "loss": 0.8187, "step": 6089 }, { "epoch": 0.8580486086650229, "grad_norm": 1.0765622006074078, "learning_rate": 1.0375049705493689e-06, "loss": 0.9176, "step": 6090 }, { "epoch": 0.8581895033462487, "grad_norm": 0.9982866882753288, "learning_rate": 1.035481711258175e-06, "loss": 0.8655, "step": 6091 }, { "epoch": 0.8583303980274745, "grad_norm": 1.176356821746777, "learning_rate": 1.0334603189697945e-06, "loss": 0.441, "step": 6092 }, { "epoch": 0.8584712927087003, "grad_norm": 1.2772229904923849, "learning_rate": 1.0314407941052095e-06, "loss": 0.557, "step": 6093 }, { "epoch": 0.8586121873899261, "grad_norm": 1.1322840317708074, "learning_rate": 1.029423137085025e-06, "loss": 0.4673, "step": 6094 }, { "epoch": 0.8587530820711519, "grad_norm": 1.20121061172875, "learning_rate": 1.0274073483294466e-06, "loss": 0.462, "step": 6095 }, { "epoch": 0.8588939767523776, "grad_norm": 1.1256973815640812, "learning_rate": 1.0253934282582934e-06, "loss": 0.7551, "step": 6096 }, { "epoch": 0.8590348714336034, "grad_norm": 1.0004112932294063, "learning_rate": 1.0233813772909962e-06, "loss": 0.8417, "step": 6097 }, { "epoch": 0.8591757661148292, "grad_norm": 0.9752062148133236, "learning_rate": 1.0213711958465977e-06, "loss": 0.8526, "step": 6098 }, { "epoch": 0.859316660796055, "grad_norm": 0.9838891613535985, "learning_rate": 1.0193628843437486e-06, "loss": 0.8177, "step": 6099 }, { "epoch": 0.8594575554772808, "grad_norm": 0.9716240740434238, "learning_rate": 1.0173564432007143e-06, "loss": 0.832, "step": 6100 }, { "epoch": 0.8595984501585066, "grad_norm": 1.4082103825102774, "learning_rate": 1.015351872835366e-06, "loss": 0.5753, "step": 6101 }, { "epoch": 0.8597393448397322, "grad_norm": 1.252037873676187, "learning_rate": 1.013349173665188e-06, "loss": 0.5334, "step": 6102 }, { "epoch": 0.859880239520958, "grad_norm": 1.0097756777701488, "learning_rate": 1.0113483461072704e-06, "loss": 0.8312, "step": 6103 }, { "epoch": 0.8600211342021838, "grad_norm": 1.2258389316063125, "learning_rate": 1.0093493905783236e-06, "loss": 0.4484, "step": 6104 }, { "epoch": 0.8601620288834096, "grad_norm": 1.1691806842731352, "learning_rate": 1.0073523074946566e-06, "loss": 0.4858, "step": 6105 }, { "epoch": 0.8603029235646354, "grad_norm": 0.942200039654108, "learning_rate": 1.005357097272197e-06, "loss": 0.7956, "step": 6106 }, { "epoch": 0.8604438182458612, "grad_norm": 0.9565686102902775, "learning_rate": 1.0033637603264745e-06, "loss": 0.7754, "step": 6107 }, { "epoch": 0.860584712927087, "grad_norm": 1.1814072667905746, "learning_rate": 1.001372297072638e-06, "loss": 0.5548, "step": 6108 }, { "epoch": 0.8607256076083127, "grad_norm": 0.9823268900176775, "learning_rate": 9.99382707925437e-07, "loss": 0.8557, "step": 6109 }, { "epoch": 0.8608665022895385, "grad_norm": 1.0864670112594736, "learning_rate": 9.973949932992389e-07, "loss": 0.8623, "step": 6110 }, { "epoch": 0.8610073969707643, "grad_norm": 1.0062391450659989, "learning_rate": 9.954091536080145e-07, "loss": 0.834, "step": 6111 }, { "epoch": 0.8611482916519901, "grad_norm": 1.088291514052288, "learning_rate": 9.934251892653467e-07, "loss": 0.8499, "step": 6112 }, { "epoch": 0.8612891863332159, "grad_norm": 1.0378011921919106, "learning_rate": 9.914431006844261e-07, "loss": 0.8302, "step": 6113 }, { "epoch": 0.8614300810144417, "grad_norm": 1.281619540058654, "learning_rate": 9.894628882780566e-07, "loss": 0.4744, "step": 6114 }, { "epoch": 0.8615709756956675, "grad_norm": 0.9660981847364293, "learning_rate": 9.874845524586463e-07, "loss": 0.904, "step": 6115 }, { "epoch": 0.8617118703768932, "grad_norm": 1.0565173947245239, "learning_rate": 9.85508093638219e-07, "loss": 0.8526, "step": 6116 }, { "epoch": 0.861852765058119, "grad_norm": 0.9785578128659146, "learning_rate": 9.835335122283974e-07, "loss": 0.8315, "step": 6117 }, { "epoch": 0.8619936597393448, "grad_norm": 1.0865894432840153, "learning_rate": 9.815608086404249e-07, "loss": 0.8466, "step": 6118 }, { "epoch": 0.8621345544205706, "grad_norm": 1.0137251570836305, "learning_rate": 9.79589983285143e-07, "loss": 0.8749, "step": 6119 }, { "epoch": 0.8622754491017964, "grad_norm": 1.0020059725352464, "learning_rate": 9.776210365730132e-07, "loss": 0.852, "step": 6120 }, { "epoch": 0.8624163437830222, "grad_norm": 1.2403218765215553, "learning_rate": 9.756539689140965e-07, "loss": 0.4848, "step": 6121 }, { "epoch": 0.8625572384642479, "grad_norm": 1.0651363281178023, "learning_rate": 9.73688780718066e-07, "loss": 0.8613, "step": 6122 }, { "epoch": 0.8626981331454737, "grad_norm": 1.0961036428497928, "learning_rate": 9.717254723942015e-07, "loss": 0.8324, "step": 6123 }, { "epoch": 0.8628390278266995, "grad_norm": 1.1616881046091303, "learning_rate": 9.697640443513978e-07, "loss": 0.8768, "step": 6124 }, { "epoch": 0.8629799225079253, "grad_norm": 1.1098158456709697, "learning_rate": 9.67804496998147e-07, "loss": 0.8724, "step": 6125 }, { "epoch": 0.8631208171891511, "grad_norm": 1.2693987987656632, "learning_rate": 9.65846830742565e-07, "loss": 0.5459, "step": 6126 }, { "epoch": 0.8632617118703769, "grad_norm": 1.3324705355832935, "learning_rate": 9.638910459923557e-07, "loss": 0.4853, "step": 6127 }, { "epoch": 0.8634026065516027, "grad_norm": 1.3461123925670764, "learning_rate": 9.619371431548508e-07, "loss": 0.5141, "step": 6128 }, { "epoch": 0.8635435012328284, "grad_norm": 1.2068356910742597, "learning_rate": 9.599851226369761e-07, "loss": 0.4196, "step": 6129 }, { "epoch": 0.8636843959140542, "grad_norm": 0.9345743751995643, "learning_rate": 9.580349848452752e-07, "loss": 0.827, "step": 6130 }, { "epoch": 0.86382529059528, "grad_norm": 1.0887814951004122, "learning_rate": 9.560867301858924e-07, "loss": 0.8903, "step": 6131 }, { "epoch": 0.8639661852765058, "grad_norm": 1.2796907237297883, "learning_rate": 9.541403590645836e-07, "loss": 0.5542, "step": 6132 }, { "epoch": 0.8641070799577316, "grad_norm": 1.131065022949197, "learning_rate": 9.521958718867108e-07, "loss": 0.8111, "step": 6133 }, { "epoch": 0.8642479746389574, "grad_norm": 1.0297402384227257, "learning_rate": 9.502532690572419e-07, "loss": 0.8494, "step": 6134 }, { "epoch": 0.8643888693201832, "grad_norm": 1.2467556781121891, "learning_rate": 9.483125509807601e-07, "loss": 0.4774, "step": 6135 }, { "epoch": 0.8645297640014089, "grad_norm": 1.241607778741772, "learning_rate": 9.463737180614474e-07, "loss": 0.5724, "step": 6136 }, { "epoch": 0.8646706586826347, "grad_norm": 0.9394123852847533, "learning_rate": 9.444367707030966e-07, "loss": 0.7555, "step": 6137 }, { "epoch": 0.8648115533638605, "grad_norm": 1.3952053640622701, "learning_rate": 9.42501709309106e-07, "loss": 0.4805, "step": 6138 }, { "epoch": 0.8649524480450863, "grad_norm": 1.0574103362217737, "learning_rate": 9.405685342824866e-07, "loss": 0.8578, "step": 6139 }, { "epoch": 0.8650933427263121, "grad_norm": 1.3399042439913014, "learning_rate": 9.386372460258486e-07, "loss": 0.5782, "step": 6140 }, { "epoch": 0.8652342374075379, "grad_norm": 1.0659828025793006, "learning_rate": 9.367078449414169e-07, "loss": 0.8068, "step": 6141 }, { "epoch": 0.8653751320887636, "grad_norm": 1.124362753799419, "learning_rate": 9.347803314310189e-07, "loss": 0.8657, "step": 6142 }, { "epoch": 0.8655160267699894, "grad_norm": 1.0214750393613352, "learning_rate": 9.328547058960891e-07, "loss": 0.7981, "step": 6143 }, { "epoch": 0.8656569214512152, "grad_norm": 0.9775388868198894, "learning_rate": 9.309309687376677e-07, "loss": 0.8019, "step": 6144 }, { "epoch": 0.865797816132441, "grad_norm": 1.008460672847482, "learning_rate": 9.290091203564067e-07, "loss": 0.8111, "step": 6145 }, { "epoch": 0.8659387108136668, "grad_norm": 1.032883086668588, "learning_rate": 9.270891611525589e-07, "loss": 0.8924, "step": 6146 }, { "epoch": 0.8660796054948926, "grad_norm": 1.1526096271703554, "learning_rate": 9.251710915259915e-07, "loss": 0.4752, "step": 6147 }, { "epoch": 0.8662205001761184, "grad_norm": 1.2723368667415194, "learning_rate": 9.232549118761636e-07, "loss": 0.4708, "step": 6148 }, { "epoch": 0.8663613948573441, "grad_norm": 1.1600808876874211, "learning_rate": 9.213406226021582e-07, "loss": 0.4701, "step": 6149 }, { "epoch": 0.8665022895385699, "grad_norm": 0.9522730740872075, "learning_rate": 9.194282241026509e-07, "loss": 0.8154, "step": 6150 }, { "epoch": 0.8666431842197957, "grad_norm": 1.3772677528631054, "learning_rate": 9.17517716775933e-07, "loss": 0.6624, "step": 6151 }, { "epoch": 0.8667840789010215, "grad_norm": 0.9988584004734089, "learning_rate": 9.156091010198953e-07, "loss": 0.8298, "step": 6152 }, { "epoch": 0.8669249735822473, "grad_norm": 1.032445785059875, "learning_rate": 9.137023772320397e-07, "loss": 0.8758, "step": 6153 }, { "epoch": 0.8670658682634731, "grad_norm": 0.987445654006482, "learning_rate": 9.117975458094674e-07, "loss": 0.8122, "step": 6154 }, { "epoch": 0.8672067629446988, "grad_norm": 1.0724668825243002, "learning_rate": 9.098946071488934e-07, "loss": 0.8118, "step": 6155 }, { "epoch": 0.8673476576259246, "grad_norm": 1.1750943036832153, "learning_rate": 9.079935616466329e-07, "loss": 0.7794, "step": 6156 }, { "epoch": 0.8674885523071504, "grad_norm": 1.1286587784616742, "learning_rate": 9.060944096986124e-07, "loss": 0.8856, "step": 6157 }, { "epoch": 0.8676294469883762, "grad_norm": 1.1508233677971007, "learning_rate": 9.041971517003545e-07, "loss": 0.8466, "step": 6158 }, { "epoch": 0.867770341669602, "grad_norm": 1.0954878532261796, "learning_rate": 9.023017880469976e-07, "loss": 0.8698, "step": 6159 }, { "epoch": 0.8679112363508278, "grad_norm": 0.9533581002356161, "learning_rate": 9.004083191332768e-07, "loss": 0.7725, "step": 6160 }, { "epoch": 0.8680521310320536, "grad_norm": 1.0038794965157325, "learning_rate": 8.985167453535437e-07, "loss": 0.869, "step": 6161 }, { "epoch": 0.8681930257132793, "grad_norm": 0.9927786158214995, "learning_rate": 8.96627067101743e-07, "loss": 0.8599, "step": 6162 }, { "epoch": 0.8683339203945051, "grad_norm": 0.9781900647951348, "learning_rate": 8.947392847714331e-07, "loss": 0.8049, "step": 6163 }, { "epoch": 0.8684748150757309, "grad_norm": 0.9155952445824995, "learning_rate": 8.928533987557697e-07, "loss": 0.8089, "step": 6164 }, { "epoch": 0.8686157097569567, "grad_norm": 1.2359352168210518, "learning_rate": 8.909694094475252e-07, "loss": 0.4464, "step": 6165 }, { "epoch": 0.8687566044381825, "grad_norm": 1.141669414764056, "learning_rate": 8.890873172390646e-07, "loss": 0.4659, "step": 6166 }, { "epoch": 0.8688974991194083, "grad_norm": 0.9388535975495551, "learning_rate": 8.872071225223677e-07, "loss": 0.8612, "step": 6167 }, { "epoch": 0.8690383938006341, "grad_norm": 1.1554403384208207, "learning_rate": 8.853288256890125e-07, "loss": 0.4754, "step": 6168 }, { "epoch": 0.8691792884818598, "grad_norm": 0.9190732115752581, "learning_rate": 8.834524271301847e-07, "loss": 0.7867, "step": 6169 }, { "epoch": 0.8693201831630856, "grad_norm": 1.1326932214417245, "learning_rate": 8.815779272366709e-07, "loss": 0.4646, "step": 6170 }, { "epoch": 0.8694610778443114, "grad_norm": 1.025604049867484, "learning_rate": 8.797053263988709e-07, "loss": 0.8023, "step": 6171 }, { "epoch": 0.8696019725255372, "grad_norm": 1.0696882344486476, "learning_rate": 8.778346250067804e-07, "loss": 0.7801, "step": 6172 }, { "epoch": 0.869742867206763, "grad_norm": 1.2959331265368006, "learning_rate": 8.759658234500023e-07, "loss": 0.4573, "step": 6173 }, { "epoch": 0.8698837618879888, "grad_norm": 1.2258495461441077, "learning_rate": 8.740989221177453e-07, "loss": 0.4543, "step": 6174 }, { "epoch": 0.8700246565692145, "grad_norm": 1.4231518343867793, "learning_rate": 8.722339213988185e-07, "loss": 0.5486, "step": 6175 }, { "epoch": 0.8701655512504403, "grad_norm": 1.0175822030942734, "learning_rate": 8.70370821681642e-07, "loss": 0.8001, "step": 6176 }, { "epoch": 0.8703064459316661, "grad_norm": 1.089921451140923, "learning_rate": 8.685096233542312e-07, "loss": 0.8475, "step": 6177 }, { "epoch": 0.8704473406128919, "grad_norm": 0.9259270082853412, "learning_rate": 8.66650326804217e-07, "loss": 0.7834, "step": 6178 }, { "epoch": 0.8705882352941177, "grad_norm": 1.0250867983537553, "learning_rate": 8.647929324188187e-07, "loss": 0.8207, "step": 6179 }, { "epoch": 0.8707291299753435, "grad_norm": 1.0458036934984176, "learning_rate": 8.629374405848734e-07, "loss": 0.8852, "step": 6180 }, { "epoch": 0.8708700246565693, "grad_norm": 1.0164450000473741, "learning_rate": 8.610838516888143e-07, "loss": 0.8794, "step": 6181 }, { "epoch": 0.871010919337795, "grad_norm": 1.0326044377754409, "learning_rate": 8.592321661166836e-07, "loss": 0.8647, "step": 6182 }, { "epoch": 0.8711518140190208, "grad_norm": 0.9237173849954566, "learning_rate": 8.573823842541218e-07, "loss": 0.7837, "step": 6183 }, { "epoch": 0.8712927087002466, "grad_norm": 0.9795726944489793, "learning_rate": 8.555345064863763e-07, "loss": 0.8693, "step": 6184 }, { "epoch": 0.8714336033814724, "grad_norm": 1.1593672552441558, "learning_rate": 8.536885331982947e-07, "loss": 0.444, "step": 6185 }, { "epoch": 0.8715744980626982, "grad_norm": 1.1701226370161888, "learning_rate": 8.51844464774334e-07, "loss": 0.8595, "step": 6186 }, { "epoch": 0.871715392743924, "grad_norm": 1.2951661659756264, "learning_rate": 8.500023015985459e-07, "loss": 0.4349, "step": 6187 }, { "epoch": 0.8718562874251496, "grad_norm": 1.0451999632342575, "learning_rate": 8.481620440545957e-07, "loss": 0.8143, "step": 6188 }, { "epoch": 0.8719971821063754, "grad_norm": 1.24987678317093, "learning_rate": 8.463236925257401e-07, "loss": 0.8554, "step": 6189 }, { "epoch": 0.8721380767876012, "grad_norm": 1.280232342618116, "learning_rate": 8.444872473948495e-07, "loss": 0.4683, "step": 6190 }, { "epoch": 0.872278971468827, "grad_norm": 1.182174023557119, "learning_rate": 8.426527090443893e-07, "loss": 0.4751, "step": 6191 }, { "epoch": 0.8724198661500528, "grad_norm": 0.9512078208289113, "learning_rate": 8.408200778564346e-07, "loss": 0.8318, "step": 6192 }, { "epoch": 0.8725607608312786, "grad_norm": 1.3045657895667533, "learning_rate": 8.389893542126592e-07, "loss": 0.4143, "step": 6193 }, { "epoch": 0.8727016555125044, "grad_norm": 1.120337475420429, "learning_rate": 8.371605384943382e-07, "loss": 0.8484, "step": 6194 }, { "epoch": 0.8728425501937301, "grad_norm": 0.9285392502401513, "learning_rate": 8.353336310823501e-07, "loss": 0.8238, "step": 6195 }, { "epoch": 0.8729834448749559, "grad_norm": 1.3264094266640816, "learning_rate": 8.335086323571817e-07, "loss": 0.5479, "step": 6196 }, { "epoch": 0.8731243395561817, "grad_norm": 1.0566324996437917, "learning_rate": 8.316855426989145e-07, "loss": 0.911, "step": 6197 }, { "epoch": 0.8732652342374075, "grad_norm": 1.0249308047260683, "learning_rate": 8.298643624872382e-07, "loss": 0.7893, "step": 6198 }, { "epoch": 0.8734061289186333, "grad_norm": 1.1099386774135298, "learning_rate": 8.280450921014416e-07, "loss": 0.8321, "step": 6199 }, { "epoch": 0.8735470235998591, "grad_norm": 1.1952664513558517, "learning_rate": 8.262277319204159e-07, "loss": 0.463, "step": 6200 }, { "epoch": 0.8736879182810849, "grad_norm": 1.03723557550655, "learning_rate": 8.24412282322653e-07, "loss": 0.8596, "step": 6201 }, { "epoch": 0.8738288129623106, "grad_norm": 1.2560990745708687, "learning_rate": 8.225987436862548e-07, "loss": 0.8265, "step": 6202 }, { "epoch": 0.8739697076435364, "grad_norm": 1.5443934213708956, "learning_rate": 8.207871163889148e-07, "loss": 0.6157, "step": 6203 }, { "epoch": 0.8741106023247622, "grad_norm": 1.2172204583783246, "learning_rate": 8.189774008079343e-07, "loss": 0.414, "step": 6204 }, { "epoch": 0.874251497005988, "grad_norm": 1.0041927591763236, "learning_rate": 8.171695973202143e-07, "loss": 0.8587, "step": 6205 }, { "epoch": 0.8743923916872138, "grad_norm": 1.0050706767197188, "learning_rate": 8.153637063022601e-07, "loss": 0.8639, "step": 6206 }, { "epoch": 0.8745332863684396, "grad_norm": 1.119522465212012, "learning_rate": 8.135597281301755e-07, "loss": 0.8581, "step": 6207 }, { "epoch": 0.8746741810496653, "grad_norm": 1.2259305815465134, "learning_rate": 8.117576631796697e-07, "loss": 0.4896, "step": 6208 }, { "epoch": 0.8748150757308911, "grad_norm": 0.9161176171025301, "learning_rate": 8.099575118260494e-07, "loss": 0.8058, "step": 6209 }, { "epoch": 0.8749559704121169, "grad_norm": 0.9376279660553992, "learning_rate": 8.081592744442257e-07, "loss": 0.8519, "step": 6210 }, { "epoch": 0.8750968650933427, "grad_norm": 0.9333178358248013, "learning_rate": 8.063629514087079e-07, "loss": 0.804, "step": 6211 }, { "epoch": 0.8752377597745685, "grad_norm": 1.0404570538125073, "learning_rate": 8.045685430936123e-07, "loss": 0.8454, "step": 6212 }, { "epoch": 0.8753786544557943, "grad_norm": 1.2877567421975007, "learning_rate": 8.0277604987265e-07, "loss": 0.4447, "step": 6213 }, { "epoch": 0.8755195491370201, "grad_norm": 0.919569415029567, "learning_rate": 8.009854721191368e-07, "loss": 0.8326, "step": 6214 }, { "epoch": 0.8756604438182458, "grad_norm": 1.1232527883557741, "learning_rate": 7.991968102059877e-07, "loss": 0.8644, "step": 6215 }, { "epoch": 0.8758013384994716, "grad_norm": 1.512513693892629, "learning_rate": 7.974100645057226e-07, "loss": 0.6058, "step": 6216 }, { "epoch": 0.8759422331806974, "grad_norm": 1.2325726874002478, "learning_rate": 7.95625235390457e-07, "loss": 0.5362, "step": 6217 }, { "epoch": 0.8760831278619232, "grad_norm": 0.9824856255817447, "learning_rate": 7.938423232319103e-07, "loss": 0.8382, "step": 6218 }, { "epoch": 0.876224022543149, "grad_norm": 1.2282933024525537, "learning_rate": 7.920613284014056e-07, "loss": 0.8674, "step": 6219 }, { "epoch": 0.8763649172243748, "grad_norm": 0.965758239248276, "learning_rate": 7.902822512698571e-07, "loss": 0.8156, "step": 6220 }, { "epoch": 0.8765058119056006, "grad_norm": 1.0542456619170035, "learning_rate": 7.885050922077908e-07, "loss": 0.7896, "step": 6221 }, { "epoch": 0.8766467065868263, "grad_norm": 1.1966296471063766, "learning_rate": 7.86729851585325e-07, "loss": 0.5481, "step": 6222 }, { "epoch": 0.8767876012680521, "grad_norm": 0.9399888533573245, "learning_rate": 7.849565297721851e-07, "loss": 0.8465, "step": 6223 }, { "epoch": 0.8769284959492779, "grad_norm": 1.013342645446278, "learning_rate": 7.831851271376912e-07, "loss": 0.8049, "step": 6224 }, { "epoch": 0.8770693906305037, "grad_norm": 0.9968839406739758, "learning_rate": 7.814156440507659e-07, "loss": 0.8379, "step": 6225 }, { "epoch": 0.8772102853117295, "grad_norm": 1.1830182400697686, "learning_rate": 7.796480808799301e-07, "loss": 0.8456, "step": 6226 }, { "epoch": 0.8773511799929553, "grad_norm": 0.9610963029276023, "learning_rate": 7.778824379933114e-07, "loss": 0.8298, "step": 6227 }, { "epoch": 0.877492074674181, "grad_norm": 1.137099288272247, "learning_rate": 7.761187157586281e-07, "loss": 0.4073, "step": 6228 }, { "epoch": 0.8776329693554068, "grad_norm": 1.0629213680524874, "learning_rate": 7.743569145432084e-07, "loss": 0.8083, "step": 6229 }, { "epoch": 0.8777738640366326, "grad_norm": 1.0714221926859155, "learning_rate": 7.725970347139722e-07, "loss": 0.8539, "step": 6230 }, { "epoch": 0.8779147587178584, "grad_norm": 1.2755120776122646, "learning_rate": 7.708390766374418e-07, "loss": 0.4986, "step": 6231 }, { "epoch": 0.8780556533990842, "grad_norm": 1.208069390316611, "learning_rate": 7.690830406797401e-07, "loss": 0.4086, "step": 6232 }, { "epoch": 0.87819654808031, "grad_norm": 0.967513232245671, "learning_rate": 7.673289272065909e-07, "loss": 0.8372, "step": 6233 }, { "epoch": 0.8783374427615358, "grad_norm": 1.2361651948884802, "learning_rate": 7.655767365833155e-07, "loss": 0.4912, "step": 6234 }, { "epoch": 0.8784783374427615, "grad_norm": 1.0289295229701008, "learning_rate": 7.638264691748354e-07, "loss": 0.8171, "step": 6235 }, { "epoch": 0.8786192321239873, "grad_norm": 1.000762111364874, "learning_rate": 7.620781253456688e-07, "loss": 0.8515, "step": 6236 }, { "epoch": 0.8787601268052131, "grad_norm": 1.2788942989129972, "learning_rate": 7.603317054599401e-07, "loss": 0.5143, "step": 6237 }, { "epoch": 0.8789010214864389, "grad_norm": 1.384011016803516, "learning_rate": 7.585872098813651e-07, "loss": 0.5258, "step": 6238 }, { "epoch": 0.8790419161676647, "grad_norm": 1.3438856797692864, "learning_rate": 7.568446389732664e-07, "loss": 0.4652, "step": 6239 }, { "epoch": 0.8791828108488905, "grad_norm": 0.9243740713226711, "learning_rate": 7.551039930985604e-07, "loss": 0.8189, "step": 6240 }, { "epoch": 0.8793237055301162, "grad_norm": 1.0606639591576204, "learning_rate": 7.533652726197638e-07, "loss": 0.8384, "step": 6241 }, { "epoch": 0.879464600211342, "grad_norm": 1.283556597295326, "learning_rate": 7.516284778989902e-07, "loss": 0.5052, "step": 6242 }, { "epoch": 0.8796054948925678, "grad_norm": 1.2266811994135198, "learning_rate": 7.498936092979592e-07, "loss": 0.5078, "step": 6243 }, { "epoch": 0.8797463895737936, "grad_norm": 1.0856757903297574, "learning_rate": 7.481606671779828e-07, "loss": 0.8614, "step": 6244 }, { "epoch": 0.8798872842550194, "grad_norm": 0.9507477757162889, "learning_rate": 7.464296518999736e-07, "loss": 0.819, "step": 6245 }, { "epoch": 0.8800281789362452, "grad_norm": 0.963650111476913, "learning_rate": 7.447005638244409e-07, "loss": 0.8241, "step": 6246 }, { "epoch": 0.880169073617471, "grad_norm": 1.2514727250314803, "learning_rate": 7.429734033114977e-07, "loss": 0.5077, "step": 6247 }, { "epoch": 0.8803099682986967, "grad_norm": 1.110409342217738, "learning_rate": 7.412481707208496e-07, "loss": 0.787, "step": 6248 }, { "epoch": 0.8804508629799225, "grad_norm": 0.9301256226424084, "learning_rate": 7.395248664118071e-07, "loss": 0.8105, "step": 6249 }, { "epoch": 0.8805917576611483, "grad_norm": 1.0938835621493512, "learning_rate": 7.378034907432741e-07, "loss": 0.7983, "step": 6250 }, { "epoch": 0.8807326523423741, "grad_norm": 1.0885285470652564, "learning_rate": 7.360840440737538e-07, "loss": 0.8207, "step": 6251 }, { "epoch": 0.8808735470235999, "grad_norm": 1.2924737666170432, "learning_rate": 7.343665267613465e-07, "loss": 0.4586, "step": 6252 }, { "epoch": 0.8810144417048257, "grad_norm": 1.2687627123149845, "learning_rate": 7.326509391637559e-07, "loss": 0.4185, "step": 6253 }, { "epoch": 0.8811553363860515, "grad_norm": 1.0616679104756837, "learning_rate": 7.309372816382776e-07, "loss": 0.8562, "step": 6254 }, { "epoch": 0.8812962310672772, "grad_norm": 1.056059273398587, "learning_rate": 7.292255545418114e-07, "loss": 0.8265, "step": 6255 }, { "epoch": 0.881437125748503, "grad_norm": 1.4218277584880183, "learning_rate": 7.275157582308456e-07, "loss": 0.6423, "step": 6256 }, { "epoch": 0.8815780204297288, "grad_norm": 1.2641341348069612, "learning_rate": 7.258078930614786e-07, "loss": 0.423, "step": 6257 }, { "epoch": 0.8817189151109546, "grad_norm": 0.9455739372306489, "learning_rate": 7.241019593893961e-07, "loss": 0.8423, "step": 6258 }, { "epoch": 0.8818598097921804, "grad_norm": 0.9953878167719771, "learning_rate": 7.22397957569887e-07, "loss": 0.823, "step": 6259 }, { "epoch": 0.8820007044734062, "grad_norm": 1.139602253474004, "learning_rate": 7.206958879578374e-07, "loss": 0.4004, "step": 6260 }, { "epoch": 0.8821415991546319, "grad_norm": 1.1775814023524436, "learning_rate": 7.189957509077305e-07, "loss": 0.4723, "step": 6261 }, { "epoch": 0.8822824938358577, "grad_norm": 1.105866763195492, "learning_rate": 7.172975467736454e-07, "loss": 0.8916, "step": 6262 }, { "epoch": 0.8824233885170835, "grad_norm": 1.1395956321843979, "learning_rate": 7.15601275909259e-07, "loss": 0.9193, "step": 6263 }, { "epoch": 0.8825642831983093, "grad_norm": 1.0986900309721137, "learning_rate": 7.139069386678499e-07, "loss": 0.8016, "step": 6264 }, { "epoch": 0.8827051778795351, "grad_norm": 0.9967845413633697, "learning_rate": 7.122145354022891e-07, "loss": 0.8117, "step": 6265 }, { "epoch": 0.8828460725607609, "grad_norm": 0.9867263298060746, "learning_rate": 7.105240664650459e-07, "loss": 0.8385, "step": 6266 }, { "epoch": 0.8829869672419867, "grad_norm": 1.0072336592496385, "learning_rate": 7.088355322081853e-07, "loss": 0.7682, "step": 6267 }, { "epoch": 0.8831278619232124, "grad_norm": 1.2728355806533735, "learning_rate": 7.071489329833758e-07, "loss": 0.569, "step": 6268 }, { "epoch": 0.8832687566044382, "grad_norm": 0.9335570804527445, "learning_rate": 7.054642691418734e-07, "loss": 0.8387, "step": 6269 }, { "epoch": 0.883409651285664, "grad_norm": 1.1489003789658196, "learning_rate": 7.037815410345405e-07, "loss": 0.8858, "step": 6270 }, { "epoch": 0.8835505459668898, "grad_norm": 1.0416153940216033, "learning_rate": 7.021007490118304e-07, "loss": 0.7955, "step": 6271 }, { "epoch": 0.8836914406481156, "grad_norm": 1.1284480919031292, "learning_rate": 7.004218934237949e-07, "loss": 0.8594, "step": 6272 }, { "epoch": 0.8838323353293414, "grad_norm": 1.0870535328252926, "learning_rate": 6.987449746200792e-07, "loss": 0.7688, "step": 6273 }, { "epoch": 0.883973230010567, "grad_norm": 1.0101605971031438, "learning_rate": 6.970699929499314e-07, "loss": 0.7968, "step": 6274 }, { "epoch": 0.8841141246917928, "grad_norm": 1.1581268595720438, "learning_rate": 6.953969487621937e-07, "loss": 0.4655, "step": 6275 }, { "epoch": 0.8842550193730186, "grad_norm": 0.9870825095917312, "learning_rate": 6.937258424053006e-07, "loss": 0.8447, "step": 6276 }, { "epoch": 0.8843959140542444, "grad_norm": 0.97691690901275, "learning_rate": 6.920566742272882e-07, "loss": 0.8333, "step": 6277 }, { "epoch": 0.8845368087354702, "grad_norm": 0.9722748083195484, "learning_rate": 6.903894445757875e-07, "loss": 0.8714, "step": 6278 }, { "epoch": 0.884677703416696, "grad_norm": 0.9851719091098208, "learning_rate": 6.887241537980249e-07, "loss": 0.7983, "step": 6279 }, { "epoch": 0.8848185980979218, "grad_norm": 1.0378460263887228, "learning_rate": 6.870608022408242e-07, "loss": 0.8515, "step": 6280 }, { "epoch": 0.8849594927791475, "grad_norm": 1.0929161838840846, "learning_rate": 6.853993902506051e-07, "loss": 0.8077, "step": 6281 }, { "epoch": 0.8851003874603733, "grad_norm": 1.2635341395103572, "learning_rate": 6.837399181733829e-07, "loss": 0.4497, "step": 6282 }, { "epoch": 0.8852412821415991, "grad_norm": 1.0340165230987926, "learning_rate": 6.820823863547654e-07, "loss": 0.8422, "step": 6283 }, { "epoch": 0.8853821768228249, "grad_norm": 1.2735702018243202, "learning_rate": 6.804267951399646e-07, "loss": 0.4408, "step": 6284 }, { "epoch": 0.8855230715040507, "grad_norm": 1.445569554080002, "learning_rate": 6.7877314487378e-07, "loss": 0.6481, "step": 6285 }, { "epoch": 0.8856639661852765, "grad_norm": 1.0121052886360469, "learning_rate": 6.771214359006151e-07, "loss": 0.8489, "step": 6286 }, { "epoch": 0.8858048608665023, "grad_norm": 1.1486695265907365, "learning_rate": 6.75471668564458e-07, "loss": 0.802, "step": 6287 }, { "epoch": 0.885945755547728, "grad_norm": 0.9929803650308165, "learning_rate": 6.73823843208905e-07, "loss": 0.8723, "step": 6288 }, { "epoch": 0.8860866502289538, "grad_norm": 0.9387811275996707, "learning_rate": 6.721779601771361e-07, "loss": 0.8736, "step": 6289 }, { "epoch": 0.8862275449101796, "grad_norm": 0.9508175880663391, "learning_rate": 6.705340198119381e-07, "loss": 0.8125, "step": 6290 }, { "epoch": 0.8863684395914054, "grad_norm": 1.0544259123043793, "learning_rate": 6.688920224556849e-07, "loss": 0.8286, "step": 6291 }, { "epoch": 0.8865093342726312, "grad_norm": 1.1847754361793659, "learning_rate": 6.672519684503476e-07, "loss": 0.5791, "step": 6292 }, { "epoch": 0.886650228953857, "grad_norm": 1.052131331426651, "learning_rate": 6.656138581374938e-07, "loss": 0.8073, "step": 6293 }, { "epoch": 0.8867911236350827, "grad_norm": 0.9691660122707209, "learning_rate": 6.639776918582875e-07, "loss": 0.7839, "step": 6294 }, { "epoch": 0.8869320183163085, "grad_norm": 1.1842965320339505, "learning_rate": 6.623434699534836e-07, "loss": 0.5041, "step": 6295 }, { "epoch": 0.8870729129975343, "grad_norm": 1.0654318484541856, "learning_rate": 6.607111927634391e-07, "loss": 0.8549, "step": 6296 }, { "epoch": 0.8872138076787601, "grad_norm": 0.9212051062129701, "learning_rate": 6.590808606280951e-07, "loss": 0.8013, "step": 6297 }, { "epoch": 0.8873547023599859, "grad_norm": 1.1261754530857255, "learning_rate": 6.574524738870003e-07, "loss": 0.8151, "step": 6298 }, { "epoch": 0.8874955970412117, "grad_norm": 1.1424194768421754, "learning_rate": 6.558260328792865e-07, "loss": 0.8184, "step": 6299 }, { "epoch": 0.8876364917224375, "grad_norm": 0.9274496183535528, "learning_rate": 6.542015379436906e-07, "loss": 0.7966, "step": 6300 }, { "epoch": 0.8877773864036632, "grad_norm": 1.4488400820315535, "learning_rate": 6.525789894185386e-07, "loss": 0.5462, "step": 6301 }, { "epoch": 0.887918281084889, "grad_norm": 1.3208919752352035, "learning_rate": 6.509583876417491e-07, "loss": 0.509, "step": 6302 }, { "epoch": 0.8880591757661148, "grad_norm": 0.9441590287219003, "learning_rate": 6.4933973295084e-07, "loss": 0.8245, "step": 6303 }, { "epoch": 0.8882000704473406, "grad_norm": 1.004059364405583, "learning_rate": 6.477230256829204e-07, "loss": 0.7796, "step": 6304 }, { "epoch": 0.8883409651285664, "grad_norm": 1.0416442843610627, "learning_rate": 6.461082661746975e-07, "loss": 0.8789, "step": 6305 }, { "epoch": 0.8884818598097922, "grad_norm": 1.0370023440576173, "learning_rate": 6.444954547624694e-07, "loss": 0.8594, "step": 6306 }, { "epoch": 0.888622754491018, "grad_norm": 1.3868089357169826, "learning_rate": 6.428845917821292e-07, "loss": 0.5841, "step": 6307 }, { "epoch": 0.8887636491722437, "grad_norm": 1.0467258218844233, "learning_rate": 6.412756775691642e-07, "loss": 0.8076, "step": 6308 }, { "epoch": 0.8889045438534695, "grad_norm": 1.0972298347242133, "learning_rate": 6.396687124586575e-07, "loss": 0.4126, "step": 6309 }, { "epoch": 0.8890454385346953, "grad_norm": 0.9350192480006967, "learning_rate": 6.380636967852827e-07, "loss": 0.8603, "step": 6310 }, { "epoch": 0.8891863332159211, "grad_norm": 1.067315755149047, "learning_rate": 6.364606308833133e-07, "loss": 0.8557, "step": 6311 }, { "epoch": 0.8893272278971469, "grad_norm": 1.0086699435427031, "learning_rate": 6.348595150866111e-07, "loss": 0.8273, "step": 6312 }, { "epoch": 0.8894681225783727, "grad_norm": 1.1377902895352094, "learning_rate": 6.332603497286338e-07, "loss": 0.8247, "step": 6313 }, { "epoch": 0.8896090172595984, "grad_norm": 1.0087373859000528, "learning_rate": 6.316631351424318e-07, "loss": 0.8017, "step": 6314 }, { "epoch": 0.8897499119408242, "grad_norm": 1.2578026030615999, "learning_rate": 6.300678716606534e-07, "loss": 0.4896, "step": 6315 }, { "epoch": 0.88989080662205, "grad_norm": 1.0944554071969956, "learning_rate": 6.284745596155339e-07, "loss": 0.8293, "step": 6316 }, { "epoch": 0.8900317013032758, "grad_norm": 1.3003212804086923, "learning_rate": 6.268831993389102e-07, "loss": 0.5322, "step": 6317 }, { "epoch": 0.8901725959845016, "grad_norm": 0.9711635990096815, "learning_rate": 6.252937911622036e-07, "loss": 0.834, "step": 6318 }, { "epoch": 0.8903134906657274, "grad_norm": 1.047879511528204, "learning_rate": 6.237063354164363e-07, "loss": 0.8836, "step": 6319 }, { "epoch": 0.8904543853469532, "grad_norm": 1.14157231031089, "learning_rate": 6.221208324322181e-07, "loss": 0.4068, "step": 6320 }, { "epoch": 0.8905952800281789, "grad_norm": 1.0419357318696543, "learning_rate": 6.205372825397593e-07, "loss": 0.8193, "step": 6321 }, { "epoch": 0.8907361747094047, "grad_norm": 1.5464353991708424, "learning_rate": 6.189556860688572e-07, "loss": 0.7788, "step": 6322 }, { "epoch": 0.8908770693906305, "grad_norm": 1.1586655508339037, "learning_rate": 6.173760433489051e-07, "loss": 0.9023, "step": 6323 }, { "epoch": 0.8910179640718563, "grad_norm": 1.042919601861331, "learning_rate": 6.157983547088853e-07, "loss": 0.8697, "step": 6324 }, { "epoch": 0.8911588587530821, "grad_norm": 0.9936673675085951, "learning_rate": 6.142226204773804e-07, "loss": 0.8963, "step": 6325 }, { "epoch": 0.8912997534343079, "grad_norm": 0.9566650316203319, "learning_rate": 6.126488409825593e-07, "loss": 0.8054, "step": 6326 }, { "epoch": 0.8914406481155336, "grad_norm": 1.3116541960649313, "learning_rate": 6.110770165521906e-07, "loss": 0.5238, "step": 6327 }, { "epoch": 0.8915815427967594, "grad_norm": 1.1700586682396634, "learning_rate": 6.095071475136249e-07, "loss": 0.4847, "step": 6328 }, { "epoch": 0.8917224374779852, "grad_norm": 1.363790368266701, "learning_rate": 6.079392341938183e-07, "loss": 0.4918, "step": 6329 }, { "epoch": 0.891863332159211, "grad_norm": 1.0697773071539498, "learning_rate": 6.063732769193098e-07, "loss": 0.8335, "step": 6330 }, { "epoch": 0.8920042268404368, "grad_norm": 0.9827511708962723, "learning_rate": 6.048092760162383e-07, "loss": 0.8947, "step": 6331 }, { "epoch": 0.8921451215216626, "grad_norm": 0.9226978317113961, "learning_rate": 6.032472318103288e-07, "loss": 0.8305, "step": 6332 }, { "epoch": 0.8922860162028884, "grad_norm": 1.3436125567352613, "learning_rate": 6.016871446269034e-07, "loss": 0.5471, "step": 6333 }, { "epoch": 0.8924269108841141, "grad_norm": 1.3278354052343408, "learning_rate": 6.001290147908723e-07, "loss": 0.5439, "step": 6334 }, { "epoch": 0.8925678055653399, "grad_norm": 1.0479792280307625, "learning_rate": 5.985728426267445e-07, "loss": 0.868, "step": 6335 }, { "epoch": 0.8927087002465657, "grad_norm": 0.9438891662665827, "learning_rate": 5.970186284586143e-07, "loss": 0.8732, "step": 6336 }, { "epoch": 0.8928495949277915, "grad_norm": 1.0115648959695251, "learning_rate": 5.954663726101739e-07, "loss": 0.7912, "step": 6337 }, { "epoch": 0.8929904896090173, "grad_norm": 0.9746106627795448, "learning_rate": 5.939160754047036e-07, "loss": 0.8078, "step": 6338 }, { "epoch": 0.8931313842902431, "grad_norm": 1.0778336617935518, "learning_rate": 5.923677371650783e-07, "loss": 0.8636, "step": 6339 }, { "epoch": 0.8932722789714689, "grad_norm": 1.176664276502124, "learning_rate": 5.908213582137611e-07, "loss": 0.4522, "step": 6340 }, { "epoch": 0.8934131736526946, "grad_norm": 0.8889210244037237, "learning_rate": 5.892769388728137e-07, "loss": 0.8131, "step": 6341 }, { "epoch": 0.8935540683339204, "grad_norm": 0.8867366609347594, "learning_rate": 5.87734479463885e-07, "loss": 0.7889, "step": 6342 }, { "epoch": 0.8936949630151462, "grad_norm": 1.0457119229804828, "learning_rate": 5.86193980308215e-07, "loss": 0.8279, "step": 6343 }, { "epoch": 0.893835857696372, "grad_norm": 0.9713355489077272, "learning_rate": 5.846554417266392e-07, "loss": 0.8937, "step": 6344 }, { "epoch": 0.8939767523775978, "grad_norm": 1.0160626232682306, "learning_rate": 5.831188640395791e-07, "loss": 0.8713, "step": 6345 }, { "epoch": 0.8941176470588236, "grad_norm": 0.9786296233886398, "learning_rate": 5.815842475670541e-07, "loss": 0.8487, "step": 6346 }, { "epoch": 0.8942585417400493, "grad_norm": 1.0006681948138996, "learning_rate": 5.800515926286721e-07, "loss": 0.8037, "step": 6347 }, { "epoch": 0.8943994364212751, "grad_norm": 1.0836135853130258, "learning_rate": 5.785208995436343e-07, "loss": 0.8368, "step": 6348 }, { "epoch": 0.8945403311025009, "grad_norm": 1.0019222265872174, "learning_rate": 5.769921686307267e-07, "loss": 0.8404, "step": 6349 }, { "epoch": 0.8946812257837267, "grad_norm": 1.1716252709032753, "learning_rate": 5.754654002083371e-07, "loss": 0.9448, "step": 6350 }, { "epoch": 0.8948221204649525, "grad_norm": 1.0244314364367755, "learning_rate": 5.739405945944354e-07, "loss": 0.8026, "step": 6351 }, { "epoch": 0.8949630151461783, "grad_norm": 0.9332366626972549, "learning_rate": 5.724177521065888e-07, "loss": 0.8048, "step": 6352 }, { "epoch": 0.8951039098274041, "grad_norm": 1.2067331737390177, "learning_rate": 5.708968730619535e-07, "loss": 0.4614, "step": 6353 }, { "epoch": 0.8952448045086298, "grad_norm": 1.0878407709700508, "learning_rate": 5.693779577772763e-07, "loss": 0.795, "step": 6354 }, { "epoch": 0.8953856991898556, "grad_norm": 1.0556795590273627, "learning_rate": 5.678610065688917e-07, "loss": 0.841, "step": 6355 }, { "epoch": 0.8955265938710814, "grad_norm": 0.9990861268202927, "learning_rate": 5.663460197527348e-07, "loss": 0.8199, "step": 6356 }, { "epoch": 0.8956674885523072, "grad_norm": 1.3069495187166196, "learning_rate": 5.648329976443212e-07, "loss": 0.6054, "step": 6357 }, { "epoch": 0.895808383233533, "grad_norm": 1.1638493940192316, "learning_rate": 5.633219405587653e-07, "loss": 0.8566, "step": 6358 }, { "epoch": 0.8959492779147588, "grad_norm": 1.0683643839016563, "learning_rate": 5.618128488107644e-07, "loss": 0.8369, "step": 6359 }, { "epoch": 0.8960901725959844, "grad_norm": 1.0013834433068767, "learning_rate": 5.603057227146136e-07, "loss": 0.8062, "step": 6360 }, { "epoch": 0.8962310672772102, "grad_norm": 1.4557941693129983, "learning_rate": 5.588005625841942e-07, "loss": 0.5207, "step": 6361 }, { "epoch": 0.896371961958436, "grad_norm": 1.1341156949072897, "learning_rate": 5.572973687329819e-07, "loss": 0.8368, "step": 6362 }, { "epoch": 0.8965128566396618, "grad_norm": 0.9524965842759804, "learning_rate": 5.557961414740398e-07, "loss": 0.8359, "step": 6363 }, { "epoch": 0.8966537513208876, "grad_norm": 0.9226239379296964, "learning_rate": 5.5429688112002e-07, "loss": 0.7473, "step": 6364 }, { "epoch": 0.8967946460021134, "grad_norm": 1.2994073967144901, "learning_rate": 5.527995879831694e-07, "loss": 0.5581, "step": 6365 }, { "epoch": 0.8969355406833392, "grad_norm": 1.2416910892734405, "learning_rate": 5.513042623753229e-07, "loss": 0.5475, "step": 6366 }, { "epoch": 0.8970764353645649, "grad_norm": 0.9203088781997663, "learning_rate": 5.498109046079037e-07, "loss": 0.8278, "step": 6367 }, { "epoch": 0.8972173300457907, "grad_norm": 1.1339379920918309, "learning_rate": 5.483195149919295e-07, "loss": 0.494, "step": 6368 }, { "epoch": 0.8973582247270165, "grad_norm": 1.003969940377465, "learning_rate": 5.468300938380056e-07, "loss": 0.7776, "step": 6369 }, { "epoch": 0.8974991194082423, "grad_norm": 1.1063748862302347, "learning_rate": 5.453426414563279e-07, "loss": 0.8233, "step": 6370 }, { "epoch": 0.8976400140894681, "grad_norm": 1.1447726569361452, "learning_rate": 5.438571581566788e-07, "loss": 0.47, "step": 6371 }, { "epoch": 0.8977809087706939, "grad_norm": 0.963663909014642, "learning_rate": 5.423736442484373e-07, "loss": 0.8521, "step": 6372 }, { "epoch": 0.8979218034519197, "grad_norm": 1.086521296448473, "learning_rate": 5.408921000405664e-07, "loss": 0.8762, "step": 6373 }, { "epoch": 0.8980626981331454, "grad_norm": 1.075365107866595, "learning_rate": 5.394125258416227e-07, "loss": 0.8366, "step": 6374 }, { "epoch": 0.8982035928143712, "grad_norm": 1.1399768504472398, "learning_rate": 5.379349219597496e-07, "loss": 0.8443, "step": 6375 }, { "epoch": 0.898344487495597, "grad_norm": 0.9952172954010138, "learning_rate": 5.36459288702682e-07, "loss": 0.794, "step": 6376 }, { "epoch": 0.8984853821768228, "grad_norm": 1.0112619995682979, "learning_rate": 5.349856263777431e-07, "loss": 0.8835, "step": 6377 }, { "epoch": 0.8986262768580486, "grad_norm": 0.9284228338741708, "learning_rate": 5.335139352918494e-07, "loss": 0.8718, "step": 6378 }, { "epoch": 0.8987671715392744, "grad_norm": 1.239286717467882, "learning_rate": 5.320442157515015e-07, "loss": 0.5782, "step": 6379 }, { "epoch": 0.8989080662205001, "grad_norm": 0.9746495607111563, "learning_rate": 5.305764680627912e-07, "loss": 0.794, "step": 6380 }, { "epoch": 0.8990489609017259, "grad_norm": 1.4589132950536456, "learning_rate": 5.291106925314005e-07, "loss": 0.5166, "step": 6381 }, { "epoch": 0.8991898555829517, "grad_norm": 1.0312729770806819, "learning_rate": 5.276468894626019e-07, "loss": 0.8419, "step": 6382 }, { "epoch": 0.8993307502641775, "grad_norm": 1.016112350375224, "learning_rate": 5.261850591612538e-07, "loss": 0.8518, "step": 6383 }, { "epoch": 0.8994716449454033, "grad_norm": 1.0918567270012252, "learning_rate": 5.247252019318073e-07, "loss": 0.8154, "step": 6384 }, { "epoch": 0.8996125396266291, "grad_norm": 1.2431748473987276, "learning_rate": 5.232673180782987e-07, "loss": 0.5882, "step": 6385 }, { "epoch": 0.8997534343078549, "grad_norm": 0.9603734867383705, "learning_rate": 5.218114079043545e-07, "loss": 0.8219, "step": 6386 }, { "epoch": 0.8998943289890806, "grad_norm": 1.2296735803504635, "learning_rate": 5.203574717131954e-07, "loss": 0.4103, "step": 6387 }, { "epoch": 0.9000352236703064, "grad_norm": 1.1443748856138714, "learning_rate": 5.189055098076223e-07, "loss": 0.8659, "step": 6388 }, { "epoch": 0.9001761183515322, "grad_norm": 1.1395790465565427, "learning_rate": 5.174555224900346e-07, "loss": 0.8612, "step": 6389 }, { "epoch": 0.900317013032758, "grad_norm": 1.2506210275507703, "learning_rate": 5.160075100624073e-07, "loss": 0.4967, "step": 6390 }, { "epoch": 0.9004579077139838, "grad_norm": 1.275322775068956, "learning_rate": 5.145614728263181e-07, "loss": 0.4887, "step": 6391 }, { "epoch": 0.9005988023952096, "grad_norm": 0.9896537589702251, "learning_rate": 5.131174110829241e-07, "loss": 0.8196, "step": 6392 }, { "epoch": 0.9007396970764354, "grad_norm": 1.1973380465256849, "learning_rate": 5.116753251329764e-07, "loss": 0.8568, "step": 6393 }, { "epoch": 0.9008805917576611, "grad_norm": 0.9682970379045309, "learning_rate": 5.102352152768108e-07, "loss": 0.8073, "step": 6394 }, { "epoch": 0.9010214864388869, "grad_norm": 1.8202471203678392, "learning_rate": 5.08797081814354e-07, "loss": 0.5416, "step": 6395 }, { "epoch": 0.9011623811201127, "grad_norm": 1.0592488835827494, "learning_rate": 5.073609250451172e-07, "loss": 0.8459, "step": 6396 }, { "epoch": 0.9013032758013385, "grad_norm": 1.3519497810875054, "learning_rate": 5.059267452682059e-07, "loss": 0.5809, "step": 6397 }, { "epoch": 0.9014441704825643, "grad_norm": 1.2569040812000267, "learning_rate": 5.044945427823078e-07, "loss": 0.4727, "step": 6398 }, { "epoch": 0.9015850651637901, "grad_norm": 0.9855579653416982, "learning_rate": 5.030643178857053e-07, "loss": 0.816, "step": 6399 }, { "epoch": 0.9017259598450158, "grad_norm": 1.0656828138395837, "learning_rate": 5.016360708762635e-07, "loss": 0.8736, "step": 6400 }, { "epoch": 0.9018668545262416, "grad_norm": 1.2666040140839625, "learning_rate": 5.002098020514378e-07, "loss": 0.6288, "step": 6401 }, { "epoch": 0.9020077492074674, "grad_norm": 1.3617472261322496, "learning_rate": 4.987855117082685e-07, "loss": 0.5991, "step": 6402 }, { "epoch": 0.9021486438886932, "grad_norm": 1.0370818266700481, "learning_rate": 4.973632001433915e-07, "loss": 0.8466, "step": 6403 }, { "epoch": 0.902289538569919, "grad_norm": 1.0008136211656748, "learning_rate": 4.959428676530232e-07, "loss": 0.8946, "step": 6404 }, { "epoch": 0.9024304332511448, "grad_norm": 0.9638919692539354, "learning_rate": 4.945245145329703e-07, "loss": 0.8603, "step": 6405 }, { "epoch": 0.9025713279323706, "grad_norm": 0.9673947656636568, "learning_rate": 4.931081410786264e-07, "loss": 0.8674, "step": 6406 }, { "epoch": 0.9027122226135963, "grad_norm": 1.103629677747465, "learning_rate": 4.916937475849748e-07, "loss": 0.4399, "step": 6407 }, { "epoch": 0.9028531172948221, "grad_norm": 1.268304298904123, "learning_rate": 4.902813343465851e-07, "loss": 0.4746, "step": 6408 }, { "epoch": 0.9029940119760479, "grad_norm": 0.9767136956993862, "learning_rate": 4.888709016576154e-07, "loss": 0.8812, "step": 6409 }, { "epoch": 0.9031349066572737, "grad_norm": 1.3182198389847062, "learning_rate": 4.874624498118108e-07, "loss": 0.4729, "step": 6410 }, { "epoch": 0.9032758013384995, "grad_norm": 1.0771199140414431, "learning_rate": 4.860559791025022e-07, "loss": 0.8054, "step": 6411 }, { "epoch": 0.9034166960197253, "grad_norm": 1.1840318285082294, "learning_rate": 4.846514898226084e-07, "loss": 0.8772, "step": 6412 }, { "epoch": 0.903557590700951, "grad_norm": 1.0114982936601904, "learning_rate": 4.832489822646402e-07, "loss": 0.7897, "step": 6413 }, { "epoch": 0.9036984853821768, "grad_norm": 1.1218478673728127, "learning_rate": 4.818484567206905e-07, "loss": 0.868, "step": 6414 }, { "epoch": 0.9038393800634026, "grad_norm": 1.2354019038861745, "learning_rate": 4.804499134824403e-07, "loss": 0.5073, "step": 6415 }, { "epoch": 0.9039802747446284, "grad_norm": 1.2403641105633325, "learning_rate": 4.790533528411567e-07, "loss": 0.4896, "step": 6416 }, { "epoch": 0.9041211694258542, "grad_norm": 0.9517779133955234, "learning_rate": 4.776587750876983e-07, "loss": 0.8675, "step": 6417 }, { "epoch": 0.90426206410708, "grad_norm": 1.1437807482721243, "learning_rate": 4.7626618051250684e-07, "loss": 0.879, "step": 6418 }, { "epoch": 0.9044029587883058, "grad_norm": 0.9859038204269386, "learning_rate": 4.7487556940561284e-07, "loss": 0.8856, "step": 6419 }, { "epoch": 0.9045438534695315, "grad_norm": 0.9700057225317384, "learning_rate": 4.7348694205663237e-07, "loss": 0.7874, "step": 6420 }, { "epoch": 0.9046847481507573, "grad_norm": 1.036048811312764, "learning_rate": 4.721002987547696e-07, "loss": 0.7849, "step": 6421 }, { "epoch": 0.9048256428319831, "grad_norm": 1.095821469375073, "learning_rate": 4.707156397888135e-07, "loss": 0.8579, "step": 6422 }, { "epoch": 0.9049665375132089, "grad_norm": 0.9391068309259114, "learning_rate": 4.6933296544714455e-07, "loss": 0.8474, "step": 6423 }, { "epoch": 0.9051074321944347, "grad_norm": 1.2381185493167262, "learning_rate": 4.679522760177213e-07, "loss": 0.4909, "step": 6424 }, { "epoch": 0.9052483268756605, "grad_norm": 0.9787730252081778, "learning_rate": 4.665735717881015e-07, "loss": 0.7928, "step": 6425 }, { "epoch": 0.9053892215568863, "grad_norm": 1.1583741658012734, "learning_rate": 4.651968530454154e-07, "loss": 0.4556, "step": 6426 }, { "epoch": 0.905530116238112, "grad_norm": 0.9409221744706816, "learning_rate": 4.638221200763882e-07, "loss": 0.8418, "step": 6427 }, { "epoch": 0.9056710109193378, "grad_norm": 0.9293299172575918, "learning_rate": 4.6244937316733185e-07, "loss": 0.8442, "step": 6428 }, { "epoch": 0.9058119056005636, "grad_norm": 1.0955091157885668, "learning_rate": 4.6107861260414e-07, "loss": 0.8116, "step": 6429 }, { "epoch": 0.9059528002817894, "grad_norm": 1.0665338154203832, "learning_rate": 4.597098386722976e-07, "loss": 0.7778, "step": 6430 }, { "epoch": 0.9060936949630152, "grad_norm": 1.11422298491873, "learning_rate": 4.583430516568721e-07, "loss": 0.8498, "step": 6431 }, { "epoch": 0.906234589644241, "grad_norm": 0.9876283466611232, "learning_rate": 4.5697825184251923e-07, "loss": 0.8137, "step": 6432 }, { "epoch": 0.9063754843254667, "grad_norm": 1.4629263219955582, "learning_rate": 4.5561543951347817e-07, "loss": 0.4491, "step": 6433 }, { "epoch": 0.9065163790066925, "grad_norm": 1.0742178576495545, "learning_rate": 4.5425461495357983e-07, "loss": 0.8323, "step": 6434 }, { "epoch": 0.9066572736879183, "grad_norm": 1.318549891598473, "learning_rate": 4.5289577844623514e-07, "loss": 0.4856, "step": 6435 }, { "epoch": 0.9067981683691441, "grad_norm": 1.0176372102279192, "learning_rate": 4.515389302744433e-07, "loss": 0.8721, "step": 6436 }, { "epoch": 0.9069390630503699, "grad_norm": 1.0789700887527067, "learning_rate": 4.501840707207883e-07, "loss": 0.7924, "step": 6437 }, { "epoch": 0.9070799577315957, "grad_norm": 1.049653796664585, "learning_rate": 4.488312000674444e-07, "loss": 0.8527, "step": 6438 }, { "epoch": 0.9072208524128215, "grad_norm": 1.0961360005511385, "learning_rate": 4.4748031859616405e-07, "loss": 0.8285, "step": 6439 }, { "epoch": 0.9073617470940472, "grad_norm": 1.0711950520643951, "learning_rate": 4.461314265882943e-07, "loss": 0.8323, "step": 6440 }, { "epoch": 0.907502641775273, "grad_norm": 0.9572022018655285, "learning_rate": 4.4478452432476063e-07, "loss": 0.8398, "step": 6441 }, { "epoch": 0.9076435364564988, "grad_norm": 1.0563630493383867, "learning_rate": 4.434396120860773e-07, "loss": 0.8308, "step": 6442 }, { "epoch": 0.9077844311377246, "grad_norm": 1.3061028339013474, "learning_rate": 4.4209669015234155e-07, "loss": 0.4391, "step": 6443 }, { "epoch": 0.9079253258189504, "grad_norm": 0.9743187635237991, "learning_rate": 4.407557588032418e-07, "loss": 0.7907, "step": 6444 }, { "epoch": 0.9080662205001762, "grad_norm": 1.0237037135638, "learning_rate": 4.3941681831804584e-07, "loss": 0.819, "step": 6445 }, { "epoch": 0.9082071151814018, "grad_norm": 1.0464994545851347, "learning_rate": 4.380798689756105e-07, "loss": 0.8267, "step": 6446 }, { "epoch": 0.9083480098626276, "grad_norm": 1.2502149945795566, "learning_rate": 4.367449110543731e-07, "loss": 0.3729, "step": 6447 }, { "epoch": 0.9084889045438534, "grad_norm": 1.3130573736370135, "learning_rate": 4.354119448323657e-07, "loss": 0.4535, "step": 6448 }, { "epoch": 0.9086297992250792, "grad_norm": 1.0197334535782185, "learning_rate": 4.34080970587194e-07, "loss": 0.8159, "step": 6449 }, { "epoch": 0.908770693906305, "grad_norm": 1.121295871377557, "learning_rate": 4.327519885960585e-07, "loss": 0.7598, "step": 6450 }, { "epoch": 0.9089115885875308, "grad_norm": 1.000586673940684, "learning_rate": 4.3142499913574e-07, "loss": 0.8101, "step": 6451 }, { "epoch": 0.9090524832687567, "grad_norm": 1.3050330106362955, "learning_rate": 4.30100002482603e-07, "loss": 0.5128, "step": 6452 }, { "epoch": 0.9091933779499823, "grad_norm": 1.0853143826278542, "learning_rate": 4.2877699891260004e-07, "loss": 0.8262, "step": 6453 }, { "epoch": 0.9093342726312081, "grad_norm": 1.1424097557193484, "learning_rate": 4.2745598870126747e-07, "loss": 0.471, "step": 6454 }, { "epoch": 0.9094751673124339, "grad_norm": 1.2939093962005759, "learning_rate": 4.261369721237274e-07, "loss": 0.608, "step": 6455 }, { "epoch": 0.9096160619936597, "grad_norm": 0.9614790472560433, "learning_rate": 4.2481994945468675e-07, "loss": 0.8248, "step": 6456 }, { "epoch": 0.9097569566748855, "grad_norm": 1.3243500844569227, "learning_rate": 4.235049209684328e-07, "loss": 0.5768, "step": 6457 }, { "epoch": 0.9098978513561113, "grad_norm": 1.2403038425400976, "learning_rate": 4.221918869388442e-07, "loss": 0.4731, "step": 6458 }, { "epoch": 0.9100387460373371, "grad_norm": 1.2929372486091388, "learning_rate": 4.2088084763937885e-07, "loss": 0.466, "step": 6459 }, { "epoch": 0.9101796407185628, "grad_norm": 2.0973115231545827, "learning_rate": 4.19571803343084e-07, "loss": 0.8782, "step": 6460 }, { "epoch": 0.9103205353997886, "grad_norm": 1.2703860861943652, "learning_rate": 4.1826475432258707e-07, "loss": 0.5307, "step": 6461 }, { "epoch": 0.9104614300810144, "grad_norm": 1.0534049373953176, "learning_rate": 4.169597008501025e-07, "loss": 0.8271, "step": 6462 }, { "epoch": 0.9106023247622402, "grad_norm": 1.0305355896183237, "learning_rate": 4.156566431974274e-07, "loss": 0.8561, "step": 6463 }, { "epoch": 0.910743219443466, "grad_norm": 1.0472577175642173, "learning_rate": 4.143555816359457e-07, "loss": 0.7978, "step": 6464 }, { "epoch": 0.9108841141246918, "grad_norm": 0.9873807386025535, "learning_rate": 4.130565164366207e-07, "loss": 0.8394, "step": 6465 }, { "epoch": 0.9110250088059175, "grad_norm": 1.0253884214105695, "learning_rate": 4.1175944787000933e-07, "loss": 0.7861, "step": 6466 }, { "epoch": 0.9111659034871433, "grad_norm": 1.0974915135786951, "learning_rate": 4.104643762062399e-07, "loss": 0.8055, "step": 6467 }, { "epoch": 0.9113067981683691, "grad_norm": 0.9465480732797444, "learning_rate": 4.0917130171503563e-07, "loss": 0.8449, "step": 6468 }, { "epoch": 0.9114476928495949, "grad_norm": 1.0312158629467658, "learning_rate": 4.0788022466569875e-07, "loss": 0.8596, "step": 6469 }, { "epoch": 0.9115885875308207, "grad_norm": 0.9289818070117167, "learning_rate": 4.065911453271165e-07, "loss": 0.844, "step": 6470 }, { "epoch": 0.9117294822120465, "grad_norm": 1.2152450800683776, "learning_rate": 4.053040639677597e-07, "loss": 0.4177, "step": 6471 }, { "epoch": 0.9118703768932723, "grad_norm": 1.1305778268106674, "learning_rate": 4.040189808556849e-07, "loss": 0.4787, "step": 6472 }, { "epoch": 0.912011271574498, "grad_norm": 1.1449667724604187, "learning_rate": 4.027358962585293e-07, "loss": 0.7937, "step": 6473 }, { "epoch": 0.9121521662557238, "grad_norm": 0.8808100773120494, "learning_rate": 4.014548104435145e-07, "loss": 0.8319, "step": 6474 }, { "epoch": 0.9122930609369496, "grad_norm": 0.9318349672366842, "learning_rate": 4.001757236774506e-07, "loss": 0.8486, "step": 6475 }, { "epoch": 0.9124339556181754, "grad_norm": 1.0941412737680611, "learning_rate": 3.9889863622672555e-07, "loss": 0.9105, "step": 6476 }, { "epoch": 0.9125748502994012, "grad_norm": 0.9639690362615342, "learning_rate": 3.976235483573132e-07, "loss": 0.8274, "step": 6477 }, { "epoch": 0.912715744980627, "grad_norm": 0.9974531405619129, "learning_rate": 3.9635046033476897e-07, "loss": 0.8411, "step": 6478 }, { "epoch": 0.9128566396618528, "grad_norm": 1.1026523280869795, "learning_rate": 3.950793724242363e-07, "loss": 0.8457, "step": 6479 }, { "epoch": 0.9129975343430785, "grad_norm": 0.9196583846748323, "learning_rate": 3.938102848904379e-07, "loss": 0.8291, "step": 6480 }, { "epoch": 0.9131384290243043, "grad_norm": 0.9922074619661413, "learning_rate": 3.9254319799768236e-07, "loss": 0.7635, "step": 6481 }, { "epoch": 0.9132793237055301, "grad_norm": 1.1809879039218596, "learning_rate": 3.9127811200985965e-07, "loss": 0.8141, "step": 6482 }, { "epoch": 0.9134202183867559, "grad_norm": 1.3315270686122997, "learning_rate": 3.900150271904446e-07, "loss": 0.6015, "step": 6483 }, { "epoch": 0.9135611130679817, "grad_norm": 0.9804562068323937, "learning_rate": 3.887539438024934e-07, "loss": 0.8023, "step": 6484 }, { "epoch": 0.9137020077492075, "grad_norm": 1.0110274848558694, "learning_rate": 3.8749486210864827e-07, "loss": 0.8796, "step": 6485 }, { "epoch": 0.9138429024304332, "grad_norm": 1.3164253001410384, "learning_rate": 3.862377823711316e-07, "loss": 0.4457, "step": 6486 }, { "epoch": 0.913983797111659, "grad_norm": 1.0195812273318747, "learning_rate": 3.8498270485175294e-07, "loss": 0.8089, "step": 6487 }, { "epoch": 0.9141246917928848, "grad_norm": 1.369486450491717, "learning_rate": 3.8372962981189753e-07, "loss": 0.5078, "step": 6488 }, { "epoch": 0.9142655864741106, "grad_norm": 1.2198776973322576, "learning_rate": 3.8247855751254115e-07, "loss": 0.5059, "step": 6489 }, { "epoch": 0.9144064811553364, "grad_norm": 1.264263183628327, "learning_rate": 3.8122948821423753e-07, "loss": 0.52, "step": 6490 }, { "epoch": 0.9145473758365622, "grad_norm": 1.0101497680327471, "learning_rate": 3.7998242217712755e-07, "loss": 0.8411, "step": 6491 }, { "epoch": 0.914688270517788, "grad_norm": 1.0148976220524104, "learning_rate": 3.7873735966093114e-07, "loss": 0.8706, "step": 6492 }, { "epoch": 0.9148291651990137, "grad_norm": 0.9114028122593176, "learning_rate": 3.774943009249521e-07, "loss": 0.7978, "step": 6493 }, { "epoch": 0.9149700598802395, "grad_norm": 1.0701594372513044, "learning_rate": 3.7625324622807545e-07, "loss": 0.8206, "step": 6494 }, { "epoch": 0.9151109545614653, "grad_norm": 1.1411325330790119, "learning_rate": 3.750141958287745e-07, "loss": 0.857, "step": 6495 }, { "epoch": 0.9152518492426911, "grad_norm": 0.9316598610886982, "learning_rate": 3.737771499850962e-07, "loss": 0.8033, "step": 6496 }, { "epoch": 0.9153927439239169, "grad_norm": 1.1140796533096056, "learning_rate": 3.7254210895468104e-07, "loss": 0.7605, "step": 6497 }, { "epoch": 0.9155336386051427, "grad_norm": 1.3577696497249545, "learning_rate": 3.7130907299473996e-07, "loss": 0.5447, "step": 6498 }, { "epoch": 0.9156745332863684, "grad_norm": 1.392343299447425, "learning_rate": 3.7007804236207533e-07, "loss": 0.5908, "step": 6499 }, { "epoch": 0.9158154279675942, "grad_norm": 1.3433516920098991, "learning_rate": 3.6884901731306767e-07, "loss": 0.5635, "step": 6500 }, { "epoch": 0.91595632264882, "grad_norm": 1.0051113170111663, "learning_rate": 3.676219981036822e-07, "loss": 0.9044, "step": 6501 }, { "epoch": 0.9160972173300458, "grad_norm": 1.0442168770079925, "learning_rate": 3.663969849894644e-07, "loss": 0.8091, "step": 6502 }, { "epoch": 0.9162381120112716, "grad_norm": 1.1300266066144362, "learning_rate": 3.6517397822554143e-07, "loss": 0.3974, "step": 6503 }, { "epoch": 0.9163790066924974, "grad_norm": 1.2826490896908127, "learning_rate": 3.639529780666251e-07, "loss": 0.4851, "step": 6504 }, { "epoch": 0.9165199013737232, "grad_norm": 0.8979369064066957, "learning_rate": 3.627339847670075e-07, "loss": 0.793, "step": 6505 }, { "epoch": 0.9166607960549489, "grad_norm": 0.9082087175456255, "learning_rate": 3.615169985805633e-07, "loss": 0.8005, "step": 6506 }, { "epoch": 0.9168016907361747, "grad_norm": 1.01292354669438, "learning_rate": 3.6030201976074987e-07, "loss": 0.8633, "step": 6507 }, { "epoch": 0.9169425854174005, "grad_norm": 1.0740898562681, "learning_rate": 3.590890485606058e-07, "loss": 0.8311, "step": 6508 }, { "epoch": 0.9170834800986263, "grad_norm": 1.2039239433715918, "learning_rate": 3.578780852327513e-07, "loss": 0.4685, "step": 6509 }, { "epoch": 0.9172243747798521, "grad_norm": 1.0793355686575314, "learning_rate": 3.5666913002938675e-07, "loss": 0.8424, "step": 6510 }, { "epoch": 0.9173652694610779, "grad_norm": 1.0022535188940342, "learning_rate": 3.5546218320229864e-07, "loss": 0.8759, "step": 6511 }, { "epoch": 0.9175061641423037, "grad_norm": 1.2764510024026512, "learning_rate": 3.5425724500285144e-07, "loss": 0.8944, "step": 6512 }, { "epoch": 0.9176470588235294, "grad_norm": 0.9774193236892714, "learning_rate": 3.5305431568199433e-07, "loss": 0.8401, "step": 6513 }, { "epoch": 0.9177879535047552, "grad_norm": 1.1472959180490354, "learning_rate": 3.5185339549025367e-07, "loss": 0.4339, "step": 6514 }, { "epoch": 0.917928848185981, "grad_norm": 0.9993593851083998, "learning_rate": 3.5065448467774155e-07, "loss": 0.808, "step": 6515 }, { "epoch": 0.9180697428672068, "grad_norm": 1.4210182408313905, "learning_rate": 3.4945758349415047e-07, "loss": 0.4928, "step": 6516 }, { "epoch": 0.9182106375484326, "grad_norm": 1.29916606086411, "learning_rate": 3.482626921887533e-07, "loss": 0.5579, "step": 6517 }, { "epoch": 0.9183515322296584, "grad_norm": 1.1354728198782076, "learning_rate": 3.470698110104076e-07, "loss": 0.8735, "step": 6518 }, { "epoch": 0.9184924269108841, "grad_norm": 1.0323657412647573, "learning_rate": 3.4587894020754466e-07, "loss": 0.7925, "step": 6519 }, { "epoch": 0.9186333215921099, "grad_norm": 1.0259660451308465, "learning_rate": 3.4469008002818716e-07, "loss": 0.8178, "step": 6520 }, { "epoch": 0.9187742162733357, "grad_norm": 1.2164181677928787, "learning_rate": 3.435032307199304e-07, "loss": 0.4488, "step": 6521 }, { "epoch": 0.9189151109545615, "grad_norm": 1.062370837883873, "learning_rate": 3.423183925299578e-07, "loss": 0.8615, "step": 6522 }, { "epoch": 0.9190560056357873, "grad_norm": 0.9376731004950142, "learning_rate": 3.411355657050286e-07, "loss": 0.8335, "step": 6523 }, { "epoch": 0.9191969003170131, "grad_norm": 1.0360901580336432, "learning_rate": 3.399547504914846e-07, "loss": 0.8307, "step": 6524 }, { "epoch": 0.9193377949982389, "grad_norm": 0.9876404508666494, "learning_rate": 3.387759471352503e-07, "loss": 0.8306, "step": 6525 }, { "epoch": 0.9194786896794646, "grad_norm": 0.9670675703406817, "learning_rate": 3.3759915588183145e-07, "loss": 0.8795, "step": 6526 }, { "epoch": 0.9196195843606904, "grad_norm": 0.997438948743112, "learning_rate": 3.364243769763098e-07, "loss": 0.79, "step": 6527 }, { "epoch": 0.9197604790419162, "grad_norm": 0.9889954573862906, "learning_rate": 3.352516106633574e-07, "loss": 0.7959, "step": 6528 }, { "epoch": 0.919901373723142, "grad_norm": 1.2331645099299398, "learning_rate": 3.3408085718721564e-07, "loss": 0.4627, "step": 6529 }, { "epoch": 0.9200422684043678, "grad_norm": 1.2538611919373293, "learning_rate": 3.3291211679171485e-07, "loss": 0.454, "step": 6530 }, { "epoch": 0.9201831630855936, "grad_norm": 1.09881917417204, "learning_rate": 3.317453897202638e-07, "loss": 0.8252, "step": 6531 }, { "epoch": 0.9203240577668192, "grad_norm": 1.0172886106365722, "learning_rate": 3.3058067621585253e-07, "loss": 0.8224, "step": 6532 }, { "epoch": 0.920464952448045, "grad_norm": 0.9661689766423147, "learning_rate": 3.2941797652104926e-07, "loss": 0.8515, "step": 6533 }, { "epoch": 0.9206058471292708, "grad_norm": 1.106284542559246, "learning_rate": 3.2825729087800684e-07, "loss": 0.8655, "step": 6534 }, { "epoch": 0.9207467418104966, "grad_norm": 1.3544768511091647, "learning_rate": 3.2709861952845314e-07, "loss": 0.6082, "step": 6535 }, { "epoch": 0.9208876364917224, "grad_norm": 1.2476950311210515, "learning_rate": 3.259419627137039e-07, "loss": 0.4881, "step": 6536 }, { "epoch": 0.9210285311729483, "grad_norm": 1.2819828774404158, "learning_rate": 3.2478732067464767e-07, "loss": 0.4748, "step": 6537 }, { "epoch": 0.921169425854174, "grad_norm": 1.3523485278366063, "learning_rate": 3.236346936517587e-07, "loss": 0.4626, "step": 6538 }, { "epoch": 0.9213103205353997, "grad_norm": 1.2529586989794892, "learning_rate": 3.224840818850905e-07, "loss": 0.5797, "step": 6539 }, { "epoch": 0.9214512152166255, "grad_norm": 0.9358966260463142, "learning_rate": 3.213354856142747e-07, "loss": 0.7614, "step": 6540 }, { "epoch": 0.9215921098978513, "grad_norm": 1.7204608883626553, "learning_rate": 3.201889050785245e-07, "loss": 0.5641, "step": 6541 }, { "epoch": 0.9217330045790771, "grad_norm": 1.0939874641411742, "learning_rate": 3.1904434051663435e-07, "loss": 0.8017, "step": 6542 }, { "epoch": 0.9218738992603029, "grad_norm": 1.2375942235913586, "learning_rate": 3.1790179216697915e-07, "loss": 0.385, "step": 6543 }, { "epoch": 0.9220147939415287, "grad_norm": 1.1658635899755254, "learning_rate": 3.1676126026750963e-07, "loss": 0.445, "step": 6544 }, { "epoch": 0.9221556886227545, "grad_norm": 1.0740662981308955, "learning_rate": 3.156227450557614e-07, "loss": 0.8625, "step": 6545 }, { "epoch": 0.9222965833039802, "grad_norm": 0.9510666889245336, "learning_rate": 3.144862467688481e-07, "loss": 0.8263, "step": 6546 }, { "epoch": 0.922437477985206, "grad_norm": 0.9480231767926531, "learning_rate": 3.133517656434626e-07, "loss": 0.8237, "step": 6547 }, { "epoch": 0.9225783726664318, "grad_norm": 1.0981049745203266, "learning_rate": 3.1221930191588146e-07, "loss": 0.7919, "step": 6548 }, { "epoch": 0.9227192673476576, "grad_norm": 1.029000676896877, "learning_rate": 3.1108885582195494e-07, "loss": 0.4198, "step": 6549 }, { "epoch": 0.9228601620288834, "grad_norm": 0.9933659359779698, "learning_rate": 3.0996042759711796e-07, "loss": 0.8313, "step": 6550 }, { "epoch": 0.9230010567101092, "grad_norm": 1.0198849375309194, "learning_rate": 3.0883401747638155e-07, "loss": 0.8074, "step": 6551 }, { "epoch": 0.9231419513913349, "grad_norm": 1.2469417186634661, "learning_rate": 3.0770962569434235e-07, "loss": 0.4402, "step": 6552 }, { "epoch": 0.9232828460725607, "grad_norm": 1.0535885883452107, "learning_rate": 3.0658725248516875e-07, "loss": 0.8179, "step": 6553 }, { "epoch": 0.9234237407537865, "grad_norm": 1.0261477913291057, "learning_rate": 3.0546689808261585e-07, "loss": 0.8439, "step": 6554 }, { "epoch": 0.9235646354350123, "grad_norm": 1.1227372479718343, "learning_rate": 3.043485627200127e-07, "loss": 0.868, "step": 6555 }, { "epoch": 0.9237055301162381, "grad_norm": 1.0731767671990389, "learning_rate": 3.0323224663027064e-07, "loss": 0.8582, "step": 6556 }, { "epoch": 0.9238464247974639, "grad_norm": 1.0451355173143717, "learning_rate": 3.0211795004588153e-07, "loss": 0.8185, "step": 6557 }, { "epoch": 0.9239873194786897, "grad_norm": 1.0021213796369834, "learning_rate": 3.0100567319891414e-07, "loss": 0.8214, "step": 6558 }, { "epoch": 0.9241282141599154, "grad_norm": 1.006035984748136, "learning_rate": 2.998954163210199e-07, "loss": 0.7976, "step": 6559 }, { "epoch": 0.9242691088411412, "grad_norm": 1.227013341781501, "learning_rate": 2.9878717964342383e-07, "loss": 0.5744, "step": 6560 }, { "epoch": 0.924410003522367, "grad_norm": 1.1321481272511573, "learning_rate": 2.9768096339693685e-07, "loss": 0.5474, "step": 6561 }, { "epoch": 0.9245508982035928, "grad_norm": 1.0268452547970404, "learning_rate": 2.9657676781194355e-07, "loss": 0.8355, "step": 6562 }, { "epoch": 0.9246917928848186, "grad_norm": 1.1030266945386356, "learning_rate": 2.954745931184122e-07, "loss": 0.4408, "step": 6563 }, { "epoch": 0.9248326875660444, "grad_norm": 0.9879952589960339, "learning_rate": 2.9437443954588697e-07, "loss": 0.8493, "step": 6564 }, { "epoch": 0.9249735822472702, "grad_norm": 0.9837325712965628, "learning_rate": 2.9327630732349235e-07, "loss": 0.8426, "step": 6565 }, { "epoch": 0.9251144769284959, "grad_norm": 1.049670835529132, "learning_rate": 2.92180196679932e-07, "loss": 0.8523, "step": 6566 }, { "epoch": 0.9252553716097217, "grad_norm": 1.3072943600526605, "learning_rate": 2.91086107843489e-07, "loss": 0.4597, "step": 6567 }, { "epoch": 0.9253962662909475, "grad_norm": 0.9181843945072302, "learning_rate": 2.899940410420221e-07, "loss": 0.8267, "step": 6568 }, { "epoch": 0.9255371609721733, "grad_norm": 1.069444462811886, "learning_rate": 2.88903996502975e-07, "loss": 0.8312, "step": 6569 }, { "epoch": 0.9256780556533991, "grad_norm": 1.189805903204176, "learning_rate": 2.8781597445336486e-07, "loss": 0.4203, "step": 6570 }, { "epoch": 0.9258189503346249, "grad_norm": 1.0607945844091167, "learning_rate": 2.867299751197894e-07, "loss": 0.8089, "step": 6571 }, { "epoch": 0.9259598450158506, "grad_norm": 1.0211492249522682, "learning_rate": 2.8564599872842547e-07, "loss": 0.834, "step": 6572 }, { "epoch": 0.9261007396970764, "grad_norm": 1.082892076405871, "learning_rate": 2.8456404550502805e-07, "loss": 0.7677, "step": 6573 }, { "epoch": 0.9262416343783022, "grad_norm": 1.1634712523230297, "learning_rate": 2.834841156749313e-07, "loss": 0.8784, "step": 6574 }, { "epoch": 0.926382529059528, "grad_norm": 1.0513221609963292, "learning_rate": 2.824062094630486e-07, "loss": 0.8439, "step": 6575 }, { "epoch": 0.9265234237407538, "grad_norm": 1.053924134928128, "learning_rate": 2.813303270938683e-07, "loss": 0.7864, "step": 6576 }, { "epoch": 0.9266643184219796, "grad_norm": 0.9570818493347726, "learning_rate": 2.802564687914633e-07, "loss": 0.8003, "step": 6577 }, { "epoch": 0.9268052131032054, "grad_norm": 1.0528452533428718, "learning_rate": 2.79184634779478e-07, "loss": 0.884, "step": 6578 }, { "epoch": 0.9269461077844311, "grad_norm": 0.9706915692445031, "learning_rate": 2.7811482528114276e-07, "loss": 0.8512, "step": 6579 }, { "epoch": 0.9270870024656569, "grad_norm": 1.2689472374769977, "learning_rate": 2.7704704051926044e-07, "loss": 0.4391, "step": 6580 }, { "epoch": 0.9272278971468827, "grad_norm": 1.2224064893221898, "learning_rate": 2.759812807162132e-07, "loss": 0.44, "step": 6581 }, { "epoch": 0.9273687918281085, "grad_norm": 0.9854274833496636, "learning_rate": 2.749175460939624e-07, "loss": 0.8629, "step": 6582 }, { "epoch": 0.9275096865093343, "grad_norm": 1.3572193595948063, "learning_rate": 2.738558368740496e-07, "loss": 0.8593, "step": 6583 }, { "epoch": 0.9276505811905601, "grad_norm": 1.1222390866814254, "learning_rate": 2.7279615327759136e-07, "loss": 0.7869, "step": 6584 }, { "epoch": 0.9277914758717858, "grad_norm": 0.9658662637501251, "learning_rate": 2.717384955252833e-07, "loss": 0.8064, "step": 6585 }, { "epoch": 0.9279323705530116, "grad_norm": 1.2033272110426987, "learning_rate": 2.7068286383739815e-07, "loss": 0.5363, "step": 6586 }, { "epoch": 0.9280732652342374, "grad_norm": 1.107256271721607, "learning_rate": 2.6962925843379116e-07, "loss": 0.9046, "step": 6587 }, { "epoch": 0.9282141599154632, "grad_norm": 0.9956583958884248, "learning_rate": 2.6857767953388903e-07, "loss": 0.8317, "step": 6588 }, { "epoch": 0.928355054596689, "grad_norm": 0.9861806303898879, "learning_rate": 2.675281273567032e-07, "loss": 0.8619, "step": 6589 }, { "epoch": 0.9284959492779148, "grad_norm": 1.2604659508118892, "learning_rate": 2.664806021208166e-07, "loss": 0.4012, "step": 6590 }, { "epoch": 0.9286368439591406, "grad_norm": 0.9131362128478699, "learning_rate": 2.6543510404439474e-07, "loss": 0.8081, "step": 6591 }, { "epoch": 0.9287777386403663, "grad_norm": 0.9149605441973956, "learning_rate": 2.643916333451768e-07, "loss": 0.8479, "step": 6592 }, { "epoch": 0.9289186333215921, "grad_norm": 1.210135072860935, "learning_rate": 2.6335019024048445e-07, "loss": 0.4606, "step": 6593 }, { "epoch": 0.9290595280028179, "grad_norm": 0.919430828038938, "learning_rate": 2.6231077494721426e-07, "loss": 0.8186, "step": 6594 }, { "epoch": 0.9292004226840437, "grad_norm": 0.9894760031288792, "learning_rate": 2.6127338768184073e-07, "loss": 0.8476, "step": 6595 }, { "epoch": 0.9293413173652695, "grad_norm": 1.2838260985477628, "learning_rate": 2.602380286604156e-07, "loss": 0.5045, "step": 6596 }, { "epoch": 0.9294822120464953, "grad_norm": 1.1513334956192771, "learning_rate": 2.592046980985674e-07, "loss": 0.3923, "step": 6597 }, { "epoch": 0.9296231067277211, "grad_norm": 1.0731778682250606, "learning_rate": 2.581733962115074e-07, "loss": 0.8796, "step": 6598 }, { "epoch": 0.9297640014089468, "grad_norm": 1.1351266134296771, "learning_rate": 2.571441232140171e-07, "loss": 0.5011, "step": 6599 }, { "epoch": 0.9299048960901726, "grad_norm": 0.9732334340750697, "learning_rate": 2.5611687932046046e-07, "loss": 0.8506, "step": 6600 }, { "epoch": 0.9300457907713984, "grad_norm": 0.9527049450508791, "learning_rate": 2.5509166474477764e-07, "loss": 0.7771, "step": 6601 }, { "epoch": 0.9301866854526242, "grad_norm": 1.4066870444205881, "learning_rate": 2.5406847970048554e-07, "loss": 0.4134, "step": 6602 }, { "epoch": 0.93032758013385, "grad_norm": 1.0978910726737108, "learning_rate": 2.530473244006759e-07, "loss": 0.8241, "step": 6603 }, { "epoch": 0.9304684748150758, "grad_norm": 1.2739044431946176, "learning_rate": 2.5202819905802535e-07, "loss": 0.5003, "step": 6604 }, { "epoch": 0.9306093694963015, "grad_norm": 1.0078219870690646, "learning_rate": 2.5101110388477954e-07, "loss": 0.8464, "step": 6605 }, { "epoch": 0.9307502641775273, "grad_norm": 0.9053337571418584, "learning_rate": 2.499960390927658e-07, "loss": 0.8449, "step": 6606 }, { "epoch": 0.9308911588587531, "grad_norm": 0.9526358098832701, "learning_rate": 2.489830048933861e-07, "loss": 0.8286, "step": 6607 }, { "epoch": 0.9310320535399789, "grad_norm": 1.0676337940133558, "learning_rate": 2.4797200149762277e-07, "loss": 0.8685, "step": 6608 }, { "epoch": 0.9311729482212047, "grad_norm": 1.0841170800820201, "learning_rate": 2.469630291160319e-07, "loss": 0.4155, "step": 6609 }, { "epoch": 0.9313138429024305, "grad_norm": 1.4219449326506497, "learning_rate": 2.459560879587497e-07, "loss": 0.5753, "step": 6610 }, { "epoch": 0.9314547375836563, "grad_norm": 1.2510127025339197, "learning_rate": 2.449511782354852e-07, "loss": 0.4858, "step": 6611 }, { "epoch": 0.931595632264882, "grad_norm": 1.2444468377255824, "learning_rate": 2.4394830015552873e-07, "loss": 0.4774, "step": 6612 }, { "epoch": 0.9317365269461078, "grad_norm": 0.9986225243900666, "learning_rate": 2.4294745392774324e-07, "loss": 0.8481, "step": 6613 }, { "epoch": 0.9318774216273336, "grad_norm": 0.9235289534186076, "learning_rate": 2.41948639760573e-07, "loss": 0.8537, "step": 6614 }, { "epoch": 0.9320183163085594, "grad_norm": 1.187147737304431, "learning_rate": 2.4095185786203624e-07, "loss": 0.5575, "step": 6615 }, { "epoch": 0.9321592109897852, "grad_norm": 1.0286021893646322, "learning_rate": 2.3995710843972787e-07, "loss": 0.8191, "step": 6616 }, { "epoch": 0.932300105671011, "grad_norm": 1.0711569114040433, "learning_rate": 2.389643917008211e-07, "loss": 0.8369, "step": 6617 }, { "epoch": 0.9324410003522366, "grad_norm": 1.1831618610051966, "learning_rate": 2.3797370785206496e-07, "loss": 0.8137, "step": 6618 }, { "epoch": 0.9325818950334624, "grad_norm": 0.9297691582665801, "learning_rate": 2.3698505709978336e-07, "loss": 0.8905, "step": 6619 }, { "epoch": 0.9327227897146882, "grad_norm": 1.002404396062948, "learning_rate": 2.3599843964988044e-07, "loss": 0.8501, "step": 6620 }, { "epoch": 0.932863684395914, "grad_norm": 0.9196911739051415, "learning_rate": 2.350138557078352e-07, "loss": 0.8496, "step": 6621 }, { "epoch": 0.9330045790771399, "grad_norm": 1.0209121978783402, "learning_rate": 2.3403130547870134e-07, "loss": 0.8126, "step": 6622 }, { "epoch": 0.9331454737583657, "grad_norm": 1.0799289445097755, "learning_rate": 2.3305078916710966e-07, "loss": 0.8269, "step": 6623 }, { "epoch": 0.9332863684395915, "grad_norm": 1.2233193826176352, "learning_rate": 2.3207230697727124e-07, "loss": 0.5269, "step": 6624 }, { "epoch": 0.9334272631208171, "grad_norm": 1.2495836688451172, "learning_rate": 2.310958591129675e-07, "loss": 0.5895, "step": 6625 }, { "epoch": 0.9335681578020429, "grad_norm": 1.0514174201068496, "learning_rate": 2.3012144577756134e-07, "loss": 0.8624, "step": 6626 }, { "epoch": 0.9337090524832687, "grad_norm": 1.3779957007504515, "learning_rate": 2.291490671739882e-07, "loss": 0.5187, "step": 6627 }, { "epoch": 0.9338499471644945, "grad_norm": 1.3684795420826985, "learning_rate": 2.2817872350476278e-07, "loss": 0.5361, "step": 6628 }, { "epoch": 0.9339908418457203, "grad_norm": 0.8960689401277367, "learning_rate": 2.2721041497197227e-07, "loss": 0.8487, "step": 6629 }, { "epoch": 0.9341317365269461, "grad_norm": 1.171677430618089, "learning_rate": 2.2624414177728538e-07, "loss": 0.8475, "step": 6630 }, { "epoch": 0.9342726312081719, "grad_norm": 1.1066766883237118, "learning_rate": 2.2527990412194113e-07, "loss": 0.8716, "step": 6631 }, { "epoch": 0.9344135258893976, "grad_norm": 1.0557863725813599, "learning_rate": 2.2431770220675885e-07, "loss": 0.8557, "step": 6632 }, { "epoch": 0.9345544205706234, "grad_norm": 1.1654403306173335, "learning_rate": 2.2335753623213054e-07, "loss": 0.5769, "step": 6633 }, { "epoch": 0.9346953152518492, "grad_norm": 1.2833013676098095, "learning_rate": 2.2239940639802726e-07, "loss": 0.4725, "step": 6634 }, { "epoch": 0.934836209933075, "grad_norm": 1.1025891586938794, "learning_rate": 2.21443312903995e-07, "loss": 0.8629, "step": 6635 }, { "epoch": 0.9349771046143008, "grad_norm": 1.183971625121482, "learning_rate": 2.204892559491556e-07, "loss": 0.4412, "step": 6636 }, { "epoch": 0.9351179992955266, "grad_norm": 1.0048967119923022, "learning_rate": 2.1953723573220564e-07, "loss": 0.8793, "step": 6637 }, { "epoch": 0.9352588939767523, "grad_norm": 1.2582568462282042, "learning_rate": 2.1858725245141764e-07, "loss": 0.8324, "step": 6638 }, { "epoch": 0.9353997886579781, "grad_norm": 1.2018177033702029, "learning_rate": 2.1763930630464226e-07, "loss": 0.4461, "step": 6639 }, { "epoch": 0.9355406833392039, "grad_norm": 0.9679568359494541, "learning_rate": 2.1669339748930262e-07, "loss": 0.8304, "step": 6640 }, { "epoch": 0.9356815780204297, "grad_norm": 1.2138169484719323, "learning_rate": 2.157495262024023e-07, "loss": 0.3832, "step": 6641 }, { "epoch": 0.9358224727016555, "grad_norm": 0.9493376550525904, "learning_rate": 2.1480769264051405e-07, "loss": 0.742, "step": 6642 }, { "epoch": 0.9359633673828813, "grad_norm": 0.9998542278878196, "learning_rate": 2.1386789699979093e-07, "loss": 0.8425, "step": 6643 }, { "epoch": 0.9361042620641071, "grad_norm": 1.0844401219562747, "learning_rate": 2.1293013947595974e-07, "loss": 0.836, "step": 6644 }, { "epoch": 0.9362451567453328, "grad_norm": 1.1001327129011034, "learning_rate": 2.1199442026432537e-07, "loss": 0.8413, "step": 6645 }, { "epoch": 0.9363860514265586, "grad_norm": 1.0219572040396683, "learning_rate": 2.1106073955976415e-07, "loss": 0.8605, "step": 6646 }, { "epoch": 0.9365269461077844, "grad_norm": 1.040324235979391, "learning_rate": 2.101290975567294e-07, "loss": 0.8089, "step": 6647 }, { "epoch": 0.9366678407890102, "grad_norm": 0.9847944425957442, "learning_rate": 2.0919949444925146e-07, "loss": 0.7863, "step": 6648 }, { "epoch": 0.936808735470236, "grad_norm": 1.2623346647607512, "learning_rate": 2.0827193043093551e-07, "loss": 0.4841, "step": 6649 }, { "epoch": 0.9369496301514618, "grad_norm": 0.9827932721561444, "learning_rate": 2.0734640569495924e-07, "loss": 0.8081, "step": 6650 }, { "epoch": 0.9370905248326876, "grad_norm": 0.9438962234226982, "learning_rate": 2.0642292043408064e-07, "loss": 0.8191, "step": 6651 }, { "epoch": 0.9372314195139133, "grad_norm": 1.0908725409514977, "learning_rate": 2.055014748406281e-07, "loss": 0.7672, "step": 6652 }, { "epoch": 0.9373723141951391, "grad_norm": 1.341665909946043, "learning_rate": 2.0458206910650813e-07, "loss": 0.5744, "step": 6653 }, { "epoch": 0.9375132088763649, "grad_norm": 1.0271086158903209, "learning_rate": 2.0366470342319976e-07, "loss": 0.8557, "step": 6654 }, { "epoch": 0.9376541035575907, "grad_norm": 0.9339857866681435, "learning_rate": 2.027493779817624e-07, "loss": 0.7657, "step": 6655 }, { "epoch": 0.9377949982388165, "grad_norm": 1.0307995958415002, "learning_rate": 2.0183609297282358e-07, "loss": 0.8739, "step": 6656 }, { "epoch": 0.9379358929200423, "grad_norm": 1.0707654541357723, "learning_rate": 2.0092484858659222e-07, "loss": 0.8783, "step": 6657 }, { "epoch": 0.938076787601268, "grad_norm": 1.0452218338695893, "learning_rate": 2.0001564501284654e-07, "loss": 0.8562, "step": 6658 }, { "epoch": 0.9382176822824938, "grad_norm": 1.0018303193370008, "learning_rate": 1.9910848244094506e-07, "loss": 0.7961, "step": 6659 }, { "epoch": 0.9383585769637196, "grad_norm": 1.0221774624823712, "learning_rate": 1.9820336105981662e-07, "loss": 0.857, "step": 6660 }, { "epoch": 0.9384994716449454, "grad_norm": 1.0725911367911172, "learning_rate": 1.9730028105796827e-07, "loss": 0.8257, "step": 6661 }, { "epoch": 0.9386403663261712, "grad_norm": 1.4137376386396556, "learning_rate": 1.9639924262348175e-07, "loss": 0.6288, "step": 6662 }, { "epoch": 0.938781261007397, "grad_norm": 1.174467152188828, "learning_rate": 1.955002459440114e-07, "loss": 0.4813, "step": 6663 }, { "epoch": 0.9389221556886228, "grad_norm": 1.4754647877836278, "learning_rate": 1.9460329120678635e-07, "loss": 0.5276, "step": 6664 }, { "epoch": 0.9390630503698485, "grad_norm": 1.11340856049723, "learning_rate": 1.937083785986127e-07, "loss": 0.8673, "step": 6665 }, { "epoch": 0.9392039450510743, "grad_norm": 0.993376646544403, "learning_rate": 1.9281550830587025e-07, "loss": 0.8242, "step": 6666 }, { "epoch": 0.9393448397323001, "grad_norm": 1.1346372574194035, "learning_rate": 1.9192468051451584e-07, "loss": 0.8258, "step": 6667 }, { "epoch": 0.9394857344135259, "grad_norm": 1.0737871677991768, "learning_rate": 1.9103589541007327e-07, "loss": 0.854, "step": 6668 }, { "epoch": 0.9396266290947517, "grad_norm": 1.051348766500945, "learning_rate": 1.9014915317765004e-07, "loss": 0.8546, "step": 6669 }, { "epoch": 0.9397675237759775, "grad_norm": 0.9078322633775224, "learning_rate": 1.8926445400192173e-07, "loss": 0.7865, "step": 6670 }, { "epoch": 0.9399084184572032, "grad_norm": 0.9157877158094203, "learning_rate": 1.883817980671432e-07, "loss": 0.8136, "step": 6671 }, { "epoch": 0.940049313138429, "grad_norm": 1.2159874071330425, "learning_rate": 1.875011855571407e-07, "loss": 0.3915, "step": 6672 }, { "epoch": 0.9401902078196548, "grad_norm": 0.938612357314385, "learning_rate": 1.8662261665531644e-07, "loss": 0.8322, "step": 6673 }, { "epoch": 0.9403311025008806, "grad_norm": 1.1137304401954913, "learning_rate": 1.8574609154464408e-07, "loss": 0.8751, "step": 6674 }, { "epoch": 0.9404719971821064, "grad_norm": 1.25474579441538, "learning_rate": 1.8487161040767532e-07, "loss": 0.504, "step": 6675 }, { "epoch": 0.9406128918633322, "grad_norm": 1.2583456227199519, "learning_rate": 1.839991734265345e-07, "loss": 0.5222, "step": 6676 }, { "epoch": 0.940753786544558, "grad_norm": 1.0486060502811376, "learning_rate": 1.8312878078292073e-07, "loss": 0.8271, "step": 6677 }, { "epoch": 0.9408946812257837, "grad_norm": 1.4276421548133622, "learning_rate": 1.82260432658109e-07, "loss": 0.7017, "step": 6678 }, { "epoch": 0.9410355759070095, "grad_norm": 1.0696569838621957, "learning_rate": 1.813941292329413e-07, "loss": 0.9035, "step": 6679 }, { "epoch": 0.9411764705882353, "grad_norm": 1.010781387725688, "learning_rate": 1.8052987068784334e-07, "loss": 0.8876, "step": 6680 }, { "epoch": 0.9413173652694611, "grad_norm": 1.022663283192386, "learning_rate": 1.7966765720280887e-07, "loss": 0.8043, "step": 6681 }, { "epoch": 0.9414582599506869, "grad_norm": 1.0802419606053866, "learning_rate": 1.7880748895740874e-07, "loss": 0.861, "step": 6682 }, { "epoch": 0.9415991546319127, "grad_norm": 1.5216411507959253, "learning_rate": 1.7794936613078629e-07, "loss": 0.6252, "step": 6683 }, { "epoch": 0.9417400493131385, "grad_norm": 0.9657585274234987, "learning_rate": 1.7709328890165855e-07, "loss": 0.7896, "step": 6684 }, { "epoch": 0.9418809439943642, "grad_norm": 1.0074383546893724, "learning_rate": 1.7623925744831627e-07, "loss": 0.8347, "step": 6685 }, { "epoch": 0.94202183867559, "grad_norm": 1.0423726881134696, "learning_rate": 1.7538727194862715e-07, "loss": 0.8118, "step": 6686 }, { "epoch": 0.9421627333568158, "grad_norm": 1.1049635429566946, "learning_rate": 1.7453733258002925e-07, "loss": 0.8533, "step": 6687 }, { "epoch": 0.9423036280380416, "grad_norm": 0.9079093356205487, "learning_rate": 1.7368943951953766e-07, "loss": 0.8286, "step": 6688 }, { "epoch": 0.9424445227192674, "grad_norm": 1.3419776927607059, "learning_rate": 1.7284359294373665e-07, "loss": 0.5817, "step": 6689 }, { "epoch": 0.9425854174004932, "grad_norm": 1.1938903782332757, "learning_rate": 1.7199979302878866e-07, "loss": 0.4396, "step": 6690 }, { "epoch": 0.9427263120817189, "grad_norm": 0.9530746280372432, "learning_rate": 1.7115803995042868e-07, "loss": 0.8519, "step": 6691 }, { "epoch": 0.9428672067629447, "grad_norm": 0.971199986759223, "learning_rate": 1.7031833388396535e-07, "loss": 0.8651, "step": 6692 }, { "epoch": 0.9430081014441705, "grad_norm": 1.160758082204575, "learning_rate": 1.694806750042799e-07, "loss": 0.8014, "step": 6693 }, { "epoch": 0.9431489961253963, "grad_norm": 1.0512829378797666, "learning_rate": 1.6864506348582832e-07, "loss": 0.8734, "step": 6694 }, { "epoch": 0.9432898908066221, "grad_norm": 1.190165353863878, "learning_rate": 1.678114995026392e-07, "loss": 0.4933, "step": 6695 }, { "epoch": 0.9434307854878479, "grad_norm": 1.0189553470342856, "learning_rate": 1.669799832283181e-07, "loss": 0.806, "step": 6696 }, { "epoch": 0.9435716801690737, "grad_norm": 0.9970823971728637, "learning_rate": 1.661505148360376e-07, "loss": 0.8263, "step": 6697 }, { "epoch": 0.9437125748502994, "grad_norm": 1.2210749636613885, "learning_rate": 1.6532309449855176e-07, "loss": 0.5049, "step": 6698 }, { "epoch": 0.9438534695315252, "grad_norm": 1.0812060052033137, "learning_rate": 1.6449772238817942e-07, "loss": 0.854, "step": 6699 }, { "epoch": 0.943994364212751, "grad_norm": 1.0554703432716743, "learning_rate": 1.6367439867682078e-07, "loss": 0.8988, "step": 6700 }, { "epoch": 0.9441352588939768, "grad_norm": 0.9467525322561694, "learning_rate": 1.628531235359454e-07, "loss": 0.7825, "step": 6701 }, { "epoch": 0.9442761535752026, "grad_norm": 1.220638955351562, "learning_rate": 1.6203389713659534e-07, "loss": 0.4297, "step": 6702 }, { "epoch": 0.9444170482564284, "grad_norm": 0.8802646470188973, "learning_rate": 1.612167196493897e-07, "loss": 0.8339, "step": 6703 }, { "epoch": 0.944557942937654, "grad_norm": 1.1035780401962563, "learning_rate": 1.6040159124451783e-07, "loss": 0.8245, "step": 6704 }, { "epoch": 0.9446988376188798, "grad_norm": 0.9930195503709552, "learning_rate": 1.5958851209174065e-07, "loss": 0.7805, "step": 6705 }, { "epoch": 0.9448397323001057, "grad_norm": 0.9257996186164121, "learning_rate": 1.5877748236039936e-07, "loss": 0.8318, "step": 6706 }, { "epoch": 0.9449806269813315, "grad_norm": 1.1143995528133337, "learning_rate": 1.5796850221939887e-07, "loss": 0.8163, "step": 6707 }, { "epoch": 0.9451215216625573, "grad_norm": 1.1206236728123067, "learning_rate": 1.5716157183722658e-07, "loss": 0.8178, "step": 6708 }, { "epoch": 0.945262416343783, "grad_norm": 1.254474708209947, "learning_rate": 1.5635669138193477e-07, "loss": 0.4493, "step": 6709 }, { "epoch": 0.9454033110250089, "grad_norm": 0.9793187626276667, "learning_rate": 1.55553861021156e-07, "loss": 0.802, "step": 6710 }, { "epoch": 0.9455442057062345, "grad_norm": 1.2426808739379862, "learning_rate": 1.5475308092208873e-07, "loss": 0.4623, "step": 6711 }, { "epoch": 0.9456851003874603, "grad_norm": 0.960703934779089, "learning_rate": 1.539543512515107e-07, "loss": 0.8987, "step": 6712 }, { "epoch": 0.9458259950686861, "grad_norm": 1.0263127558995229, "learning_rate": 1.5315767217576993e-07, "loss": 0.7824, "step": 6713 }, { "epoch": 0.9459668897499119, "grad_norm": 1.0407413401536358, "learning_rate": 1.5236304386078593e-07, "loss": 0.8268, "step": 6714 }, { "epoch": 0.9461077844311377, "grad_norm": 1.0357084051546048, "learning_rate": 1.5157046647205297e-07, "loss": 0.8549, "step": 6715 }, { "epoch": 0.9462486791123635, "grad_norm": 1.3393381529692123, "learning_rate": 1.5077994017464014e-07, "loss": 0.6158, "step": 6716 }, { "epoch": 0.9463895737935893, "grad_norm": 1.1159773006162, "learning_rate": 1.4999146513318352e-07, "loss": 0.7957, "step": 6717 }, { "epoch": 0.946530468474815, "grad_norm": 0.9606594279934014, "learning_rate": 1.492050415118973e-07, "loss": 0.8141, "step": 6718 }, { "epoch": 0.9466713631560408, "grad_norm": 1.3392791501067085, "learning_rate": 1.4842066947456712e-07, "loss": 0.5162, "step": 6719 }, { "epoch": 0.9468122578372666, "grad_norm": 1.424025471610414, "learning_rate": 1.4763834918455123e-07, "loss": 0.4471, "step": 6720 }, { "epoch": 0.9469531525184924, "grad_norm": 1.0491654709116185, "learning_rate": 1.4685808080477816e-07, "loss": 0.8448, "step": 6721 }, { "epoch": 0.9470940471997182, "grad_norm": 1.4049342051813298, "learning_rate": 1.4607986449775235e-07, "loss": 0.5419, "step": 6722 }, { "epoch": 0.947234941880944, "grad_norm": 0.9510453710270583, "learning_rate": 1.4530370042554976e-07, "loss": 0.7992, "step": 6723 }, { "epoch": 0.9473758365621697, "grad_norm": 1.2910940477556503, "learning_rate": 1.4452958874981882e-07, "loss": 0.437, "step": 6724 }, { "epoch": 0.9475167312433955, "grad_norm": 1.1549325899882157, "learning_rate": 1.437575296317806e-07, "loss": 0.8446, "step": 6725 }, { "epoch": 0.9476576259246213, "grad_norm": 1.0377277417163886, "learning_rate": 1.4298752323222753e-07, "loss": 0.8421, "step": 6726 }, { "epoch": 0.9477985206058471, "grad_norm": 1.2906380535912672, "learning_rate": 1.4221956971152696e-07, "loss": 0.4938, "step": 6727 }, { "epoch": 0.9479394152870729, "grad_norm": 0.9827978910632862, "learning_rate": 1.4145366922961645e-07, "loss": 0.8105, "step": 6728 }, { "epoch": 0.9480803099682987, "grad_norm": 1.0410576208191564, "learning_rate": 1.4068982194600844e-07, "loss": 0.8571, "step": 6729 }, { "epoch": 0.9482212046495245, "grad_norm": 1.408975321085076, "learning_rate": 1.3992802801978566e-07, "loss": 0.4743, "step": 6730 }, { "epoch": 0.9483620993307502, "grad_norm": 1.2049139436107246, "learning_rate": 1.3916828760960233e-07, "loss": 0.4666, "step": 6731 }, { "epoch": 0.948502994011976, "grad_norm": 1.0150783683627358, "learning_rate": 1.384106008736863e-07, "loss": 0.8153, "step": 6732 }, { "epoch": 0.9486438886932018, "grad_norm": 1.0544791879808237, "learning_rate": 1.3765496796984024e-07, "loss": 0.8363, "step": 6733 }, { "epoch": 0.9487847833744276, "grad_norm": 0.9189467326291318, "learning_rate": 1.3690138905543494e-07, "loss": 0.7718, "step": 6734 }, { "epoch": 0.9489256780556534, "grad_norm": 0.909733275177671, "learning_rate": 1.3614986428741596e-07, "loss": 0.8534, "step": 6735 }, { "epoch": 0.9490665727368792, "grad_norm": 1.2393476309934515, "learning_rate": 1.354003938222992e-07, "loss": 0.8477, "step": 6736 }, { "epoch": 0.949207467418105, "grad_norm": 1.1983023660091816, "learning_rate": 1.3465297781617427e-07, "loss": 0.4625, "step": 6737 }, { "epoch": 0.9493483620993307, "grad_norm": 1.0271442532579005, "learning_rate": 1.3390761642470218e-07, "loss": 0.8432, "step": 6738 }, { "epoch": 0.9494892567805565, "grad_norm": 0.967347609683602, "learning_rate": 1.3316430980311767e-07, "loss": 0.8547, "step": 6739 }, { "epoch": 0.9496301514617823, "grad_norm": 1.016009089897673, "learning_rate": 1.324230581062236e-07, "loss": 0.8494, "step": 6740 }, { "epoch": 0.9497710461430081, "grad_norm": 1.0610956696555394, "learning_rate": 1.3168386148839973e-07, "loss": 0.8375, "step": 6741 }, { "epoch": 0.9499119408242339, "grad_norm": 1.0662099314273477, "learning_rate": 1.30946720103593e-07, "loss": 0.8599, "step": 6742 }, { "epoch": 0.9500528355054597, "grad_norm": 1.1878052004330555, "learning_rate": 1.3021163410532722e-07, "loss": 0.4329, "step": 6743 }, { "epoch": 0.9501937301866854, "grad_norm": 1.1567426593260515, "learning_rate": 1.294786036466944e-07, "loss": 0.8139, "step": 6744 }, { "epoch": 0.9503346248679112, "grad_norm": 1.031336511536441, "learning_rate": 1.2874762888036018e-07, "loss": 0.8665, "step": 6745 }, { "epoch": 0.950475519549137, "grad_norm": 1.375920182571306, "learning_rate": 1.280187099585606e-07, "loss": 0.5029, "step": 6746 }, { "epoch": 0.9506164142303628, "grad_norm": 1.1125798319660691, "learning_rate": 1.2729184703310636e-07, "loss": 0.4575, "step": 6747 }, { "epoch": 0.9507573089115886, "grad_norm": 1.1856748078274009, "learning_rate": 1.2656704025537647e-07, "loss": 0.5144, "step": 6748 }, { "epoch": 0.9508982035928144, "grad_norm": 1.1052627427662858, "learning_rate": 1.2584428977632456e-07, "loss": 0.8099, "step": 6749 }, { "epoch": 0.9510390982740402, "grad_norm": 1.253844571437599, "learning_rate": 1.251235957464747e-07, "loss": 0.5414, "step": 6750 }, { "epoch": 0.9511799929552659, "grad_norm": 1.1135237489669005, "learning_rate": 1.2440495831592237e-07, "loss": 0.8285, "step": 6751 }, { "epoch": 0.9513208876364917, "grad_norm": 1.0086846844690012, "learning_rate": 1.236883776343345e-07, "loss": 0.8109, "step": 6752 }, { "epoch": 0.9514617823177175, "grad_norm": 0.9363638069813768, "learning_rate": 1.2297385385095174e-07, "loss": 0.827, "step": 6753 }, { "epoch": 0.9516026769989433, "grad_norm": 0.9944053207016258, "learning_rate": 1.2226138711458502e-07, "loss": 0.8794, "step": 6754 }, { "epoch": 0.9517435716801691, "grad_norm": 0.9128116411474053, "learning_rate": 1.2155097757361566e-07, "loss": 0.8056, "step": 6755 }, { "epoch": 0.9518844663613949, "grad_norm": 0.9958392729676012, "learning_rate": 1.2084262537599756e-07, "loss": 0.7985, "step": 6756 }, { "epoch": 0.9520253610426206, "grad_norm": 1.111933485938458, "learning_rate": 1.2013633066925822e-07, "loss": 0.4541, "step": 6757 }, { "epoch": 0.9521662557238464, "grad_norm": 1.0891802885488988, "learning_rate": 1.1943209360049223e-07, "loss": 0.9167, "step": 6758 }, { "epoch": 0.9523071504050722, "grad_norm": 1.015985166944442, "learning_rate": 1.1872991431637116e-07, "loss": 0.7984, "step": 6759 }, { "epoch": 0.952448045086298, "grad_norm": 0.935107598744509, "learning_rate": 1.1802979296313244e-07, "loss": 0.7837, "step": 6760 }, { "epoch": 0.9525889397675238, "grad_norm": 1.0499073085059683, "learning_rate": 1.1733172968658724e-07, "loss": 0.8446, "step": 6761 }, { "epoch": 0.9527298344487496, "grad_norm": 1.2085270322583295, "learning_rate": 1.1663572463212036e-07, "loss": 0.4313, "step": 6762 }, { "epoch": 0.9528707291299754, "grad_norm": 1.0890966393291852, "learning_rate": 1.1594177794468365e-07, "loss": 0.8464, "step": 6763 }, { "epoch": 0.9530116238112011, "grad_norm": 1.2740676639377984, "learning_rate": 1.152498897688037e-07, "loss": 0.6217, "step": 6764 }, { "epoch": 0.9531525184924269, "grad_norm": 1.0253229569475693, "learning_rate": 1.1456006024857747e-07, "loss": 0.8407, "step": 6765 }, { "epoch": 0.9532934131736527, "grad_norm": 0.9733914978883982, "learning_rate": 1.1387228952767338e-07, "loss": 0.8502, "step": 6766 }, { "epoch": 0.9534343078548785, "grad_norm": 1.0837456282152726, "learning_rate": 1.1318657774932796e-07, "loss": 0.9183, "step": 6767 }, { "epoch": 0.9535752025361043, "grad_norm": 0.9850367584033763, "learning_rate": 1.1250292505635364e-07, "loss": 0.8007, "step": 6768 }, { "epoch": 0.9537160972173301, "grad_norm": 1.1381689452578012, "learning_rate": 1.1182133159113096e-07, "loss": 0.8109, "step": 6769 }, { "epoch": 0.9538569918985559, "grad_norm": 0.9595554598330573, "learning_rate": 1.1114179749561304e-07, "loss": 0.829, "step": 6770 }, { "epoch": 0.9539978865797816, "grad_norm": 0.974222863547552, "learning_rate": 1.1046432291132447e-07, "loss": 0.7768, "step": 6771 }, { "epoch": 0.9541387812610074, "grad_norm": 1.0723940752038432, "learning_rate": 1.0978890797935904e-07, "loss": 0.8001, "step": 6772 }, { "epoch": 0.9542796759422332, "grad_norm": 1.2635230744388666, "learning_rate": 1.09115552840382e-07, "loss": 0.563, "step": 6773 }, { "epoch": 0.954420570623459, "grad_norm": 1.0201512466330414, "learning_rate": 1.0844425763463118e-07, "loss": 0.7895, "step": 6774 }, { "epoch": 0.9545614653046848, "grad_norm": 1.229344290517789, "learning_rate": 1.0777502250191474e-07, "loss": 0.5359, "step": 6775 }, { "epoch": 0.9547023599859106, "grad_norm": 0.9006270556494145, "learning_rate": 1.0710784758161119e-07, "loss": 0.8429, "step": 6776 }, { "epoch": 0.9548432546671363, "grad_norm": 1.0709577873288498, "learning_rate": 1.0644273301266827e-07, "loss": 0.7815, "step": 6777 }, { "epoch": 0.9549841493483621, "grad_norm": 1.032098670959155, "learning_rate": 1.0577967893360964e-07, "loss": 0.8211, "step": 6778 }, { "epoch": 0.9551250440295879, "grad_norm": 1.286275582931106, "learning_rate": 1.0511868548252591e-07, "loss": 0.4905, "step": 6779 }, { "epoch": 0.9552659387108137, "grad_norm": 0.9181165852352876, "learning_rate": 1.0445975279707809e-07, "loss": 0.8394, "step": 6780 }, { "epoch": 0.9554068333920395, "grad_norm": 0.9367002566449232, "learning_rate": 1.0380288101450087e-07, "loss": 0.866, "step": 6781 }, { "epoch": 0.9555477280732653, "grad_norm": 1.3828198348610072, "learning_rate": 1.0314807027159812e-07, "loss": 0.513, "step": 6782 }, { "epoch": 0.9556886227544911, "grad_norm": 0.9836860682133796, "learning_rate": 1.02495320704743e-07, "loss": 0.9057, "step": 6783 }, { "epoch": 0.9558295174357168, "grad_norm": 1.122796011934158, "learning_rate": 1.0184463244988229e-07, "loss": 0.8523, "step": 6784 }, { "epoch": 0.9559704121169426, "grad_norm": 1.1120108216236237, "learning_rate": 1.0119600564253207e-07, "loss": 0.8769, "step": 6785 }, { "epoch": 0.9561113067981684, "grad_norm": 1.3876543345593382, "learning_rate": 1.0054944041777981e-07, "loss": 0.4806, "step": 6786 }, { "epoch": 0.9562522014793942, "grad_norm": 1.0427947720306647, "learning_rate": 9.990493691028002e-08, "loss": 0.8381, "step": 6787 }, { "epoch": 0.95639309616062, "grad_norm": 1.180965067459619, "learning_rate": 9.926249525426423e-08, "loss": 0.458, "step": 6788 }, { "epoch": 0.9565339908418458, "grad_norm": 1.1310677493778267, "learning_rate": 9.862211558352874e-08, "loss": 0.8757, "step": 6789 }, { "epoch": 0.9566748855230714, "grad_norm": 0.9815640877191696, "learning_rate": 9.798379803144465e-08, "loss": 0.832, "step": 6790 }, { "epoch": 0.9568157802042973, "grad_norm": 0.966642945275247, "learning_rate": 9.734754273095005e-08, "loss": 0.8757, "step": 6791 }, { "epoch": 0.956956674885523, "grad_norm": 1.5450325626315629, "learning_rate": 9.67133498145556e-08, "loss": 0.6529, "step": 6792 }, { "epoch": 0.9570975695667489, "grad_norm": 0.9556965172000466, "learning_rate": 9.608121941434234e-08, "loss": 0.7492, "step": 6793 }, { "epoch": 0.9572384642479747, "grad_norm": 0.9582222442983919, "learning_rate": 9.545115166196161e-08, "loss": 0.7999, "step": 6794 }, { "epoch": 0.9573793589292005, "grad_norm": 1.1720493933116647, "learning_rate": 9.482314668863402e-08, "loss": 0.8563, "step": 6795 }, { "epoch": 0.9575202536104263, "grad_norm": 1.0929694981971347, "learning_rate": 9.419720462515381e-08, "loss": 0.3728, "step": 6796 }, { "epoch": 0.9576611482916519, "grad_norm": 1.2130885261711672, "learning_rate": 9.357332560188004e-08, "loss": 0.4577, "step": 6797 }, { "epoch": 0.9578020429728777, "grad_norm": 1.0664325056009996, "learning_rate": 9.295150974874767e-08, "loss": 0.7643, "step": 6798 }, { "epoch": 0.9579429376541035, "grad_norm": 1.0941909700135002, "learning_rate": 9.233175719525866e-08, "loss": 0.862, "step": 6799 }, { "epoch": 0.9580838323353293, "grad_norm": 1.0445350349604767, "learning_rate": 9.171406807048755e-08, "loss": 0.8437, "step": 6800 }, { "epoch": 0.9582247270165551, "grad_norm": 1.3431264012569704, "learning_rate": 9.109844250307587e-08, "loss": 0.5886, "step": 6801 }, { "epoch": 0.958365621697781, "grad_norm": 0.9069892558198951, "learning_rate": 9.048488062123995e-08, "loss": 0.8034, "step": 6802 }, { "epoch": 0.9585065163790067, "grad_norm": 1.3799020670301358, "learning_rate": 8.98733825527609e-08, "loss": 0.4902, "step": 6803 }, { "epoch": 0.9586474110602324, "grad_norm": 0.9667589204772815, "learning_rate": 8.926394842499574e-08, "loss": 0.8363, "step": 6804 }, { "epoch": 0.9587883057414582, "grad_norm": 1.0588265981321996, "learning_rate": 8.86565783648663e-08, "loss": 0.8075, "step": 6805 }, { "epoch": 0.958929200422684, "grad_norm": 1.0186938221956758, "learning_rate": 8.805127249886914e-08, "loss": 0.8067, "step": 6806 }, { "epoch": 0.9590700951039098, "grad_norm": 1.3998864876882842, "learning_rate": 8.744803095306786e-08, "loss": 0.5942, "step": 6807 }, { "epoch": 0.9592109897851356, "grad_norm": 1.094409740625248, "learning_rate": 8.684685385309533e-08, "loss": 0.8526, "step": 6808 }, { "epoch": 0.9593518844663614, "grad_norm": 0.9217824591828974, "learning_rate": 8.624774132415914e-08, "loss": 0.8689, "step": 6809 }, { "epoch": 0.9594927791475871, "grad_norm": 1.1892517043431081, "learning_rate": 8.565069349103172e-08, "loss": 0.8434, "step": 6810 }, { "epoch": 0.9596336738288129, "grad_norm": 1.1198824758617387, "learning_rate": 8.505571047805916e-08, "loss": 0.8411, "step": 6811 }, { "epoch": 0.9597745685100387, "grad_norm": 1.2302072271801283, "learning_rate": 8.446279240915567e-08, "loss": 0.5089, "step": 6812 }, { "epoch": 0.9599154631912645, "grad_norm": 1.0327774541606483, "learning_rate": 8.38719394078058e-08, "loss": 0.8276, "step": 6813 }, { "epoch": 0.9600563578724903, "grad_norm": 1.1016739265833124, "learning_rate": 8.328315159706334e-08, "loss": 0.8677, "step": 6814 }, { "epoch": 0.9601972525537161, "grad_norm": 1.0502123721233037, "learning_rate": 8.269642909955355e-08, "loss": 0.8391, "step": 6815 }, { "epoch": 0.9603381472349419, "grad_norm": 1.1547475908097493, "learning_rate": 8.211177203746978e-08, "loss": 0.8957, "step": 6816 }, { "epoch": 0.9604790419161676, "grad_norm": 1.2947946997316993, "learning_rate": 8.152918053257797e-08, "loss": 0.4579, "step": 6817 }, { "epoch": 0.9606199365973934, "grad_norm": 0.9741983819365867, "learning_rate": 8.094865470620994e-08, "loss": 0.8079, "step": 6818 }, { "epoch": 0.9607608312786192, "grad_norm": 1.1222252990622452, "learning_rate": 8.037019467927009e-08, "loss": 0.8072, "step": 6819 }, { "epoch": 0.960901725959845, "grad_norm": 1.03957385819521, "learning_rate": 7.979380057223207e-08, "loss": 0.8166, "step": 6820 }, { "epoch": 0.9610426206410708, "grad_norm": 1.0032675381154568, "learning_rate": 7.921947250513983e-08, "loss": 0.8327, "step": 6821 }, { "epoch": 0.9611835153222966, "grad_norm": 1.2270919955068558, "learning_rate": 7.86472105976055e-08, "loss": 0.45, "step": 6822 }, { "epoch": 0.9613244100035223, "grad_norm": 0.9904472286947639, "learning_rate": 7.807701496881149e-08, "loss": 0.8346, "step": 6823 }, { "epoch": 0.9614653046847481, "grad_norm": 1.053629758926234, "learning_rate": 7.750888573750947e-08, "loss": 0.8695, "step": 6824 }, { "epoch": 0.9616061993659739, "grad_norm": 1.0975357335889775, "learning_rate": 7.69428230220226e-08, "loss": 0.5175, "step": 6825 }, { "epoch": 0.9617470940471997, "grad_norm": 1.021169726384837, "learning_rate": 7.637882694024101e-08, "loss": 0.8618, "step": 6826 }, { "epoch": 0.9618879887284255, "grad_norm": 1.1643658045970169, "learning_rate": 7.581689760962852e-08, "loss": 0.8248, "step": 6827 }, { "epoch": 0.9620288834096513, "grad_norm": 1.139817517411235, "learning_rate": 7.525703514721261e-08, "loss": 0.8493, "step": 6828 }, { "epoch": 0.9621697780908771, "grad_norm": 0.9690856931531535, "learning_rate": 7.469923966959447e-08, "loss": 0.8377, "step": 6829 }, { "epoch": 0.9623106727721028, "grad_norm": 1.0687808396287815, "learning_rate": 7.414351129294561e-08, "loss": 0.8377, "step": 6830 }, { "epoch": 0.9624515674533286, "grad_norm": 1.015023518721549, "learning_rate": 7.358985013300236e-08, "loss": 0.8649, "step": 6831 }, { "epoch": 0.9625924621345544, "grad_norm": 1.0911745086872326, "learning_rate": 7.303825630507689e-08, "loss": 0.8368, "step": 6832 }, { "epoch": 0.9627333568157802, "grad_norm": 1.0441762193377655, "learning_rate": 7.248872992404398e-08, "loss": 0.8458, "step": 6833 }, { "epoch": 0.962874251497006, "grad_norm": 1.0831058913698373, "learning_rate": 7.194127110435323e-08, "loss": 0.8563, "step": 6834 }, { "epoch": 0.9630151461782318, "grad_norm": 1.228429096221124, "learning_rate": 7.139587996002006e-08, "loss": 0.5684, "step": 6835 }, { "epoch": 0.9631560408594576, "grad_norm": 1.1443205760787518, "learning_rate": 7.085255660463252e-08, "loss": 0.829, "step": 6836 }, { "epoch": 0.9632969355406833, "grad_norm": 1.042221730894473, "learning_rate": 7.031130115134566e-08, "loss": 0.8127, "step": 6837 }, { "epoch": 0.9634378302219091, "grad_norm": 0.9992315654425723, "learning_rate": 6.977211371288373e-08, "loss": 0.8373, "step": 6838 }, { "epoch": 0.9635787249031349, "grad_norm": 1.2781040108383976, "learning_rate": 6.923499440154247e-08, "loss": 0.5535, "step": 6839 }, { "epoch": 0.9637196195843607, "grad_norm": 1.1015557870821162, "learning_rate": 6.86999433291835e-08, "loss": 0.8414, "step": 6840 }, { "epoch": 0.9638605142655865, "grad_norm": 1.0169549381777452, "learning_rate": 6.816696060724215e-08, "loss": 0.7925, "step": 6841 }, { "epoch": 0.9640014089468123, "grad_norm": 1.071944293725943, "learning_rate": 6.76360463467185e-08, "loss": 0.8787, "step": 6842 }, { "epoch": 0.964142303628038, "grad_norm": 0.9779444212298763, "learning_rate": 6.710720065818521e-08, "loss": 0.8326, "step": 6843 }, { "epoch": 0.9642831983092638, "grad_norm": 1.2577545865899011, "learning_rate": 6.658042365178086e-08, "loss": 0.5967, "step": 6844 }, { "epoch": 0.9644240929904896, "grad_norm": 1.3100538630961869, "learning_rate": 6.605571543721767e-08, "loss": 0.5335, "step": 6845 }, { "epoch": 0.9645649876717154, "grad_norm": 1.272998757907739, "learning_rate": 6.553307612377268e-08, "loss": 0.5651, "step": 6846 }, { "epoch": 0.9647058823529412, "grad_norm": 1.1071076393317998, "learning_rate": 6.501250582029329e-08, "loss": 0.8584, "step": 6847 }, { "epoch": 0.964846777034167, "grad_norm": 1.0565296705023905, "learning_rate": 6.449400463519939e-08, "loss": 0.8373, "step": 6848 }, { "epoch": 0.9649876717153928, "grad_norm": 1.0059485504591013, "learning_rate": 6.397757267647353e-08, "loss": 0.8615, "step": 6849 }, { "epoch": 0.9651285663966185, "grad_norm": 1.2311244127375673, "learning_rate": 6.346321005167299e-08, "loss": 0.8266, "step": 6850 }, { "epoch": 0.9652694610778443, "grad_norm": 0.9923882687881475, "learning_rate": 6.295091686791987e-08, "loss": 0.8136, "step": 6851 }, { "epoch": 0.9654103557590701, "grad_norm": 1.076648831864813, "learning_rate": 6.244069323191104e-08, "loss": 0.8624, "step": 6852 }, { "epoch": 0.9655512504402959, "grad_norm": 1.2011267020550576, "learning_rate": 6.193253924990483e-08, "loss": 0.8771, "step": 6853 }, { "epoch": 0.9656921451215217, "grad_norm": 1.1396026841212221, "learning_rate": 6.142645502773548e-08, "loss": 0.8622, "step": 6854 }, { "epoch": 0.9658330398027475, "grad_norm": 1.2412959027620258, "learning_rate": 6.092244067079978e-08, "loss": 0.634, "step": 6855 }, { "epoch": 0.9659739344839733, "grad_norm": 1.0147713937604066, "learning_rate": 6.042049628406931e-08, "loss": 0.8709, "step": 6856 }, { "epoch": 0.966114829165199, "grad_norm": 1.1564289221937527, "learning_rate": 5.99206219720816e-08, "loss": 0.8591, "step": 6857 }, { "epoch": 0.9662557238464248, "grad_norm": 1.3462732333763405, "learning_rate": 5.9422817838942214e-08, "loss": 0.526, "step": 6858 }, { "epoch": 0.9663966185276506, "grad_norm": 1.1912448921566154, "learning_rate": 5.892708398832714e-08, "loss": 0.4407, "step": 6859 }, { "epoch": 0.9665375132088764, "grad_norm": 1.2797995652206713, "learning_rate": 5.8433420523481554e-08, "loss": 0.525, "step": 6860 }, { "epoch": 0.9666784078901022, "grad_norm": 1.1367357246829484, "learning_rate": 5.7941827547217666e-08, "loss": 0.5061, "step": 6861 }, { "epoch": 0.966819302571328, "grad_norm": 1.1254772616958133, "learning_rate": 5.745230516191802e-08, "loss": 0.8715, "step": 6862 }, { "epoch": 0.9669601972525537, "grad_norm": 1.0297179434562589, "learning_rate": 5.696485346953329e-08, "loss": 0.8417, "step": 6863 }, { "epoch": 0.9671010919337795, "grad_norm": 0.9192765837687376, "learning_rate": 5.647947257158337e-08, "loss": 0.8182, "step": 6864 }, { "epoch": 0.9672419866150053, "grad_norm": 1.0241126082828418, "learning_rate": 5.59961625691563e-08, "loss": 0.8132, "step": 6865 }, { "epoch": 0.9673828812962311, "grad_norm": 1.2075128359622567, "learning_rate": 5.5514923562908215e-08, "loss": 0.7304, "step": 6866 }, { "epoch": 0.9675237759774569, "grad_norm": 1.1551635637368658, "learning_rate": 5.503575565306674e-08, "loss": 0.4747, "step": 6867 }, { "epoch": 0.9676646706586827, "grad_norm": 1.1613207224792563, "learning_rate": 5.4558658939424246e-08, "loss": 0.4439, "step": 6868 }, { "epoch": 0.9678055653399085, "grad_norm": 1.2218438021429083, "learning_rate": 5.4083633521344605e-08, "loss": 0.4952, "step": 6869 }, { "epoch": 0.9679464600211342, "grad_norm": 0.9062673015067745, "learning_rate": 5.361067949775867e-08, "loss": 0.79, "step": 6870 }, { "epoch": 0.96808735470236, "grad_norm": 0.9761774662467883, "learning_rate": 5.313979696716654e-08, "loss": 0.8239, "step": 6871 }, { "epoch": 0.9682282493835858, "grad_norm": 0.9669179624631339, "learning_rate": 5.267098602763865e-08, "loss": 0.8621, "step": 6872 }, { "epoch": 0.9683691440648116, "grad_norm": 0.9292410939550523, "learning_rate": 5.2204246776810244e-08, "loss": 0.8426, "step": 6873 }, { "epoch": 0.9685100387460374, "grad_norm": 1.2311687704583445, "learning_rate": 5.173957931188911e-08, "loss": 0.4924, "step": 6874 }, { "epoch": 0.9686509334272632, "grad_norm": 1.3064249160396404, "learning_rate": 5.127698372964785e-08, "loss": 0.4307, "step": 6875 }, { "epoch": 0.9687918281084889, "grad_norm": 1.2162228229357577, "learning_rate": 5.081646012642938e-08, "loss": 0.5183, "step": 6876 }, { "epoch": 0.9689327227897147, "grad_norm": 1.0744059967998125, "learning_rate": 5.035800859814588e-08, "loss": 0.8762, "step": 6877 }, { "epoch": 0.9690736174709405, "grad_norm": 1.037646197252639, "learning_rate": 4.990162924027764e-08, "loss": 0.8094, "step": 6878 }, { "epoch": 0.9692145121521663, "grad_norm": 1.388662879821851, "learning_rate": 4.944732214787196e-08, "loss": 0.4962, "step": 6879 }, { "epoch": 0.969355406833392, "grad_norm": 0.9986301639501008, "learning_rate": 4.899508741554537e-08, "loss": 0.7938, "step": 6880 }, { "epoch": 0.9694963015146179, "grad_norm": 1.3821761766651957, "learning_rate": 4.854492513748365e-08, "loss": 0.4774, "step": 6881 }, { "epoch": 0.9696371961958437, "grad_norm": 1.2218956549016269, "learning_rate": 4.809683540743959e-08, "loss": 0.4589, "step": 6882 }, { "epoch": 0.9697780908770693, "grad_norm": 1.143562480878125, "learning_rate": 4.765081831873519e-08, "loss": 0.7827, "step": 6883 }, { "epoch": 0.9699189855582951, "grad_norm": 0.9157322214035108, "learning_rate": 4.72068739642606e-08, "loss": 0.7494, "step": 6884 }, { "epoch": 0.9700598802395209, "grad_norm": 1.0426368017237444, "learning_rate": 4.676500243647297e-08, "loss": 0.8306, "step": 6885 }, { "epoch": 0.9702007749207467, "grad_norm": 1.0013884370812818, "learning_rate": 4.632520382740091e-08, "loss": 0.8387, "step": 6886 }, { "epoch": 0.9703416696019725, "grad_norm": 0.9932471868167492, "learning_rate": 4.588747822863893e-08, "loss": 0.8298, "step": 6887 }, { "epoch": 0.9704825642831983, "grad_norm": 1.0350028984026671, "learning_rate": 4.545182573135076e-08, "loss": 0.8285, "step": 6888 }, { "epoch": 0.9706234589644241, "grad_norm": 1.3034861162576838, "learning_rate": 4.501824642626718e-08, "loss": 0.525, "step": 6889 }, { "epoch": 0.9707643536456498, "grad_norm": 1.2590573956986635, "learning_rate": 4.458674040368816e-08, "loss": 0.4574, "step": 6890 }, { "epoch": 0.9709052483268756, "grad_norm": 1.0879595983357753, "learning_rate": 4.4157307753480704e-08, "loss": 0.4864, "step": 6891 }, { "epoch": 0.9710461430081014, "grad_norm": 0.8966724797460466, "learning_rate": 4.372994856508217e-08, "loss": 0.8622, "step": 6892 }, { "epoch": 0.9711870376893272, "grad_norm": 1.2645742631453298, "learning_rate": 4.330466292749691e-08, "loss": 0.5163, "step": 6893 }, { "epoch": 0.971327932370553, "grad_norm": 1.0996516606143831, "learning_rate": 4.2881450929297406e-08, "loss": 0.8246, "step": 6894 }, { "epoch": 0.9714688270517788, "grad_norm": 1.0493329871719739, "learning_rate": 4.246031265862316e-08, "loss": 0.7855, "step": 6895 }, { "epoch": 0.9716097217330045, "grad_norm": 1.36673506326172, "learning_rate": 4.2041248203184006e-08, "loss": 0.5421, "step": 6896 }, { "epoch": 0.9717506164142303, "grad_norm": 0.9893624523806768, "learning_rate": 4.162425765025679e-08, "loss": 0.8468, "step": 6897 }, { "epoch": 0.9718915110954561, "grad_norm": 1.0478081694502184, "learning_rate": 4.120934108668429e-08, "loss": 0.3828, "step": 6898 }, { "epoch": 0.9720324057766819, "grad_norm": 1.1825893180742129, "learning_rate": 4.07964985988829e-08, "loss": 0.8219, "step": 6899 }, { "epoch": 0.9721733004579077, "grad_norm": 1.1420853458496607, "learning_rate": 4.0385730272830545e-08, "loss": 0.8518, "step": 6900 }, { "epoch": 0.9723141951391335, "grad_norm": 1.0656753392152751, "learning_rate": 3.997703619407878e-08, "loss": 0.8078, "step": 6901 }, { "epoch": 0.9724550898203593, "grad_norm": 0.9499036352605325, "learning_rate": 3.957041644774173e-08, "loss": 0.8458, "step": 6902 }, { "epoch": 0.972595984501585, "grad_norm": 1.4178153813271621, "learning_rate": 3.9165871118507225e-08, "loss": 0.5468, "step": 6903 }, { "epoch": 0.9727368791828108, "grad_norm": 1.1900503046165425, "learning_rate": 3.8763400290626755e-08, "loss": 0.4586, "step": 6904 }, { "epoch": 0.9728777738640366, "grad_norm": 1.0428213436673976, "learning_rate": 3.8363004047922145e-08, "loss": 0.7853, "step": 6905 }, { "epoch": 0.9730186685452624, "grad_norm": 0.9360214051078829, "learning_rate": 3.7964682473780045e-08, "loss": 0.8181, "step": 6906 }, { "epoch": 0.9731595632264882, "grad_norm": 0.9911241473229164, "learning_rate": 3.756843565115964e-08, "loss": 0.8354, "step": 6907 }, { "epoch": 0.973300457907714, "grad_norm": 1.0128872324439453, "learning_rate": 3.717426366258492e-08, "loss": 0.8691, "step": 6908 }, { "epoch": 0.9734413525889397, "grad_norm": 1.0075415738260076, "learning_rate": 3.6782166590147996e-08, "loss": 0.8041, "step": 6909 }, { "epoch": 0.9735822472701655, "grad_norm": 1.3181854008788567, "learning_rate": 3.63921445155091e-08, "loss": 0.4678, "step": 6910 }, { "epoch": 0.9737231419513913, "grad_norm": 1.0258691764553274, "learning_rate": 3.60041975198977e-08, "loss": 0.7935, "step": 6911 }, { "epoch": 0.9738640366326171, "grad_norm": 1.1444011183377563, "learning_rate": 3.561832568410917e-08, "loss": 0.4778, "step": 6912 }, { "epoch": 0.9740049313138429, "grad_norm": 1.0133771356022392, "learning_rate": 3.5234529088507e-08, "loss": 0.8444, "step": 6913 }, { "epoch": 0.9741458259950687, "grad_norm": 1.0029056025308807, "learning_rate": 3.4852807813022803e-08, "loss": 0.8168, "step": 6914 }, { "epoch": 0.9742867206762945, "grad_norm": 1.1648195736614861, "learning_rate": 3.447316193715744e-08, "loss": 0.4795, "step": 6915 }, { "epoch": 0.9744276153575202, "grad_norm": 1.0285578776070634, "learning_rate": 3.4095591539976544e-08, "loss": 0.8253, "step": 6916 }, { "epoch": 0.974568510038746, "grad_norm": 1.5494876238983382, "learning_rate": 3.3720096700117224e-08, "loss": 0.5328, "step": 6917 }, { "epoch": 0.9747094047199718, "grad_norm": 1.0359539765861205, "learning_rate": 3.334667749577913e-08, "loss": 0.8597, "step": 6918 }, { "epoch": 0.9748502994011976, "grad_norm": 1.0210753900717422, "learning_rate": 3.2975334004735625e-08, "loss": 0.8203, "step": 6919 }, { "epoch": 0.9749911940824234, "grad_norm": 1.1662424108649265, "learning_rate": 3.2606066304323727e-08, "loss": 0.8376, "step": 6920 }, { "epoch": 0.9751320887636492, "grad_norm": 1.07289179209655, "learning_rate": 3.2238874471449686e-08, "loss": 0.8709, "step": 6921 }, { "epoch": 0.975272983444875, "grad_norm": 1.046367980156584, "learning_rate": 3.187375858258568e-08, "loss": 0.8821, "step": 6922 }, { "epoch": 0.9754138781261007, "grad_norm": 1.0292000649506952, "learning_rate": 3.15107187137742e-08, "loss": 0.8172, "step": 6923 }, { "epoch": 0.9755547728073265, "grad_norm": 1.0753774095908122, "learning_rate": 3.114975494062367e-08, "loss": 0.8786, "step": 6924 }, { "epoch": 0.9756956674885523, "grad_norm": 0.9967902591723306, "learning_rate": 3.0790867338310605e-08, "loss": 0.8165, "step": 6925 }, { "epoch": 0.9758365621697781, "grad_norm": 0.9530831988214359, "learning_rate": 3.043405598157856e-08, "loss": 0.8573, "step": 6926 }, { "epoch": 0.9759774568510039, "grad_norm": 0.9560043295051127, "learning_rate": 3.00793209447392e-08, "loss": 0.8255, "step": 6927 }, { "epoch": 0.9761183515322297, "grad_norm": 1.145263529683114, "learning_rate": 2.9726662301672316e-08, "loss": 0.8431, "step": 6928 }, { "epoch": 0.9762592462134554, "grad_norm": 1.0120599986214833, "learning_rate": 2.9376080125824713e-08, "loss": 0.7524, "step": 6929 }, { "epoch": 0.9764001408946812, "grad_norm": 1.155142084135343, "learning_rate": 2.902757449021021e-08, "loss": 0.9096, "step": 6930 }, { "epoch": 0.976541035575907, "grad_norm": 0.9414554913045425, "learning_rate": 2.8681145467410744e-08, "loss": 0.8065, "step": 6931 }, { "epoch": 0.9766819302571328, "grad_norm": 0.9629745088004603, "learning_rate": 2.833679312957527e-08, "loss": 0.8482, "step": 6932 }, { "epoch": 0.9768228249383586, "grad_norm": 1.2894401482306903, "learning_rate": 2.7994517548420864e-08, "loss": 0.4555, "step": 6933 }, { "epoch": 0.9769637196195844, "grad_norm": 1.2743771054777497, "learning_rate": 2.7654318795232726e-08, "loss": 0.5319, "step": 6934 }, { "epoch": 0.9771046143008102, "grad_norm": 1.0554265904240707, "learning_rate": 2.7316196940860852e-08, "loss": 0.8427, "step": 6935 }, { "epoch": 0.9772455089820359, "grad_norm": 0.9576417947765948, "learning_rate": 2.6980152055726684e-08, "loss": 0.8151, "step": 6936 }, { "epoch": 0.9773864036632617, "grad_norm": 0.9752433589847738, "learning_rate": 2.664618420981424e-08, "loss": 0.7945, "step": 6937 }, { "epoch": 0.9775272983444875, "grad_norm": 1.3169946255942504, "learning_rate": 2.631429347268011e-08, "loss": 0.4679, "step": 6938 }, { "epoch": 0.9776681930257133, "grad_norm": 0.9745728534614566, "learning_rate": 2.5984479913443437e-08, "loss": 0.8302, "step": 6939 }, { "epoch": 0.9778090877069391, "grad_norm": 1.277288070834984, "learning_rate": 2.5656743600795953e-08, "loss": 0.4927, "step": 6940 }, { "epoch": 0.9779499823881649, "grad_norm": 1.3807049556413333, "learning_rate": 2.533108460299083e-08, "loss": 0.4883, "step": 6941 }, { "epoch": 0.9780908770693907, "grad_norm": 1.2221480499068444, "learning_rate": 2.5007502987854926e-08, "loss": 0.4166, "step": 6942 }, { "epoch": 0.9782317717506164, "grad_norm": 1.2813025341115822, "learning_rate": 2.4685998822776557e-08, "loss": 0.53, "step": 6943 }, { "epoch": 0.9783726664318422, "grad_norm": 1.0497475448279505, "learning_rate": 2.4366572174714387e-08, "loss": 0.8075, "step": 6944 }, { "epoch": 0.978513561113068, "grad_norm": 1.104018722727954, "learning_rate": 2.4049223110196307e-08, "loss": 0.7973, "step": 6945 }, { "epoch": 0.9786544557942938, "grad_norm": 0.966424444531974, "learning_rate": 2.3733951695312783e-08, "loss": 0.818, "step": 6946 }, { "epoch": 0.9787953504755196, "grad_norm": 1.1472619377796154, "learning_rate": 2.3420757995724632e-08, "loss": 0.8664, "step": 6947 }, { "epoch": 0.9789362451567454, "grad_norm": 1.0564910863393766, "learning_rate": 2.3109642076660778e-08, "loss": 0.8361, "step": 6948 }, { "epoch": 0.9790771398379711, "grad_norm": 1.1021360606481791, "learning_rate": 2.280060400291384e-08, "loss": 0.8489, "step": 6949 }, { "epoch": 0.9792180345191969, "grad_norm": 1.1820296022722163, "learning_rate": 2.249364383884789e-08, "loss": 0.5592, "step": 6950 }, { "epoch": 0.9793589292004227, "grad_norm": 1.1032746811295275, "learning_rate": 2.218876164839068e-08, "loss": 0.8285, "step": 6951 }, { "epoch": 0.9794998238816485, "grad_norm": 0.9571172606976173, "learning_rate": 2.18859574950403e-08, "loss": 0.8365, "step": 6952 }, { "epoch": 0.9796407185628743, "grad_norm": 1.0474356069955832, "learning_rate": 2.1585231441859646e-08, "loss": 0.792, "step": 6953 }, { "epoch": 0.9797816132441001, "grad_norm": 1.010249649365674, "learning_rate": 2.1286583551479723e-08, "loss": 0.838, "step": 6954 }, { "epoch": 0.9799225079253259, "grad_norm": 1.2675780808556123, "learning_rate": 2.099001388609856e-08, "loss": 0.5443, "step": 6955 }, { "epoch": 0.9800634026065516, "grad_norm": 1.1695788256394553, "learning_rate": 2.069552250748119e-08, "loss": 0.4814, "step": 6956 }, { "epoch": 0.9802042972877774, "grad_norm": 0.9413831930077021, "learning_rate": 2.040310947696078e-08, "loss": 0.8142, "step": 6957 }, { "epoch": 0.9803451919690032, "grad_norm": 1.1087880054061765, "learning_rate": 2.01127748554375e-08, "loss": 0.803, "step": 6958 }, { "epoch": 0.980486086650229, "grad_norm": 1.0460870235923356, "learning_rate": 1.9824518703376317e-08, "loss": 0.8055, "step": 6959 }, { "epoch": 0.9806269813314548, "grad_norm": 0.9236168394598953, "learning_rate": 1.953834108081254e-08, "loss": 0.7845, "step": 6960 }, { "epoch": 0.9807678760126806, "grad_norm": 1.0098013249340865, "learning_rate": 1.9254242047346272e-08, "loss": 0.8382, "step": 6961 }, { "epoch": 0.9809087706939063, "grad_norm": 1.0599522158688846, "learning_rate": 1.897222166214685e-08, "loss": 0.8469, "step": 6962 }, { "epoch": 0.981049665375132, "grad_norm": 1.0837769972347455, "learning_rate": 1.8692279983948404e-08, "loss": 0.846, "step": 6963 }, { "epoch": 0.9811905600563579, "grad_norm": 1.0098524130543105, "learning_rate": 1.841441707105429e-08, "loss": 0.8277, "step": 6964 }, { "epoch": 0.9813314547375837, "grad_norm": 1.0787586430190392, "learning_rate": 1.813863298133267e-08, "loss": 0.7836, "step": 6965 }, { "epoch": 0.9814723494188095, "grad_norm": 1.3597352454629361, "learning_rate": 1.786492777222093e-08, "loss": 0.5705, "step": 6966 }, { "epoch": 0.9816132441000353, "grad_norm": 1.0696592761240926, "learning_rate": 1.7593301500721248e-08, "loss": 0.8332, "step": 6967 }, { "epoch": 0.981754138781261, "grad_norm": 1.0043326072648366, "learning_rate": 1.7323754223405044e-08, "loss": 0.8339, "step": 6968 }, { "epoch": 0.9818950334624867, "grad_norm": 0.9418144327547464, "learning_rate": 1.705628599640963e-08, "loss": 0.8576, "step": 6969 }, { "epoch": 0.9820359281437125, "grad_norm": 0.9740975840821298, "learning_rate": 1.6790896875440445e-08, "loss": 0.7953, "step": 6970 }, { "epoch": 0.9821768228249383, "grad_norm": 1.2230304779602053, "learning_rate": 1.6527586915767723e-08, "loss": 0.883, "step": 6971 }, { "epoch": 0.9823177175061641, "grad_norm": 1.4008056186061733, "learning_rate": 1.6266356172229826e-08, "loss": 0.5264, "step": 6972 }, { "epoch": 0.98245861218739, "grad_norm": 1.039307928207482, "learning_rate": 1.6007204699232115e-08, "loss": 0.7318, "step": 6973 }, { "epoch": 0.9825995068686157, "grad_norm": 1.089351555301736, "learning_rate": 1.5750132550748086e-08, "loss": 0.8976, "step": 6974 }, { "epoch": 0.9827404015498415, "grad_norm": 1.1842120675766543, "learning_rate": 1.5495139780317137e-08, "loss": 0.5033, "step": 6975 }, { "epoch": 0.9828812962310672, "grad_norm": 0.9533278463751899, "learning_rate": 1.524222644104456e-08, "loss": 0.8731, "step": 6976 }, { "epoch": 0.983022190912293, "grad_norm": 1.2322224451468304, "learning_rate": 1.4991392585603783e-08, "loss": 0.5088, "step": 6977 }, { "epoch": 0.9831630855935188, "grad_norm": 1.0506733928035255, "learning_rate": 1.4742638266234122e-08, "loss": 0.8331, "step": 6978 }, { "epoch": 0.9833039802747446, "grad_norm": 0.9268032348643722, "learning_rate": 1.449596353474414e-08, "loss": 0.8161, "step": 6979 }, { "epoch": 0.9834448749559704, "grad_norm": 0.9479284620839689, "learning_rate": 1.4251368442507185e-08, "loss": 0.7722, "step": 6980 }, { "epoch": 0.9835857696371962, "grad_norm": 0.9585004475582487, "learning_rate": 1.4008853040464732e-08, "loss": 0.8515, "step": 6981 }, { "epoch": 0.9837266643184219, "grad_norm": 1.0328481265434815, "learning_rate": 1.3768417379123045e-08, "loss": 0.889, "step": 6982 }, { "epoch": 0.9838675589996477, "grad_norm": 0.9259224143671491, "learning_rate": 1.3530061508557624e-08, "loss": 0.858, "step": 6983 }, { "epoch": 0.9840084536808735, "grad_norm": 1.078002533504364, "learning_rate": 1.329378547840876e-08, "loss": 0.8322, "step": 6984 }, { "epoch": 0.9841493483620993, "grad_norm": 1.3674040396148774, "learning_rate": 1.3059589337885981e-08, "loss": 0.4885, "step": 6985 }, { "epoch": 0.9842902430433251, "grad_norm": 0.9702189439282287, "learning_rate": 1.2827473135764712e-08, "loss": 0.814, "step": 6986 }, { "epoch": 0.9844311377245509, "grad_norm": 0.9726995463104121, "learning_rate": 1.2597436920385175e-08, "loss": 0.7933, "step": 6987 }, { "epoch": 0.9845720324057767, "grad_norm": 0.9952999681681972, "learning_rate": 1.2369480739656824e-08, "loss": 0.7971, "step": 6988 }, { "epoch": 0.9847129270870024, "grad_norm": 0.9764107025946379, "learning_rate": 1.2143604641055018e-08, "loss": 0.7997, "step": 6989 }, { "epoch": 0.9848538217682282, "grad_norm": 1.1356191997308143, "learning_rate": 1.1919808671622124e-08, "loss": 0.8259, "step": 6990 }, { "epoch": 0.984994716449454, "grad_norm": 1.0156144505431184, "learning_rate": 1.1698092877967526e-08, "loss": 0.8567, "step": 6991 }, { "epoch": 0.9851356111306798, "grad_norm": 1.0270295616919003, "learning_rate": 1.147845730626651e-08, "loss": 0.8542, "step": 6992 }, { "epoch": 0.9852765058119056, "grad_norm": 1.0659004628046451, "learning_rate": 1.1260902002262485e-08, "loss": 0.8492, "step": 6993 }, { "epoch": 0.9854174004931314, "grad_norm": 0.9937188552919859, "learning_rate": 1.1045427011263653e-08, "loss": 0.8101, "step": 6994 }, { "epoch": 0.9855582951743571, "grad_norm": 0.9613233796576777, "learning_rate": 1.083203237814634e-08, "loss": 0.7609, "step": 6995 }, { "epoch": 0.9856991898555829, "grad_norm": 0.94836479815138, "learning_rate": 1.062071814735388e-08, "loss": 0.8747, "step": 6996 }, { "epoch": 0.9858400845368087, "grad_norm": 1.1699837153605233, "learning_rate": 1.0411484362896629e-08, "loss": 0.4545, "step": 6997 }, { "epoch": 0.9859809792180345, "grad_norm": 1.3105512638411496, "learning_rate": 1.0204331068348617e-08, "loss": 0.543, "step": 6998 }, { "epoch": 0.9861218738992603, "grad_norm": 1.0359002935474078, "learning_rate": 9.99925830685422e-09, "loss": 0.853, "step": 6999 }, { "epoch": 0.9862627685804861, "grad_norm": 1.0921218213890598, "learning_rate": 9.79626612112261e-09, "loss": 0.8334, "step": 7000 }, { "epoch": 0.9864036632617119, "grad_norm": 1.009838969407089, "learning_rate": 9.59535455343108e-09, "loss": 0.8514, "step": 7001 }, { "epoch": 0.9865445579429376, "grad_norm": 0.8759690706218888, "learning_rate": 9.396523645620604e-09, "loss": 0.8286, "step": 7002 }, { "epoch": 0.9866854526241634, "grad_norm": 1.2124614074300106, "learning_rate": 9.19977343910361e-09, "loss": 0.4563, "step": 7003 }, { "epoch": 0.9868263473053892, "grad_norm": 1.0377580308881866, "learning_rate": 9.005103974853991e-09, "loss": 0.8745, "step": 7004 }, { "epoch": 0.986967241986615, "grad_norm": 1.053527554937543, "learning_rate": 8.81251529341598e-09, "loss": 0.8774, "step": 7005 }, { "epoch": 0.9871081366678408, "grad_norm": 0.9751284024769862, "learning_rate": 8.622007434899715e-09, "loss": 0.8389, "step": 7006 }, { "epoch": 0.9872490313490666, "grad_norm": 1.117315349269672, "learning_rate": 8.433580438980127e-09, "loss": 0.8332, "step": 7007 }, { "epoch": 0.9873899260302924, "grad_norm": 1.0224967637971702, "learning_rate": 8.247234344901378e-09, "loss": 0.8633, "step": 7008 }, { "epoch": 0.9875308207115181, "grad_norm": 1.247814602988564, "learning_rate": 8.062969191471315e-09, "loss": 0.4636, "step": 7009 }, { "epoch": 0.9876717153927439, "grad_norm": 1.4347774890773701, "learning_rate": 7.880785017068126e-09, "loss": 0.5031, "step": 7010 }, { "epoch": 0.9878126100739697, "grad_norm": 1.0156544218087764, "learning_rate": 7.700681859632575e-09, "loss": 0.8758, "step": 7011 }, { "epoch": 0.9879535047551955, "grad_norm": 1.0785006709365965, "learning_rate": 7.522659756675765e-09, "loss": 0.7905, "step": 7012 }, { "epoch": 0.9880943994364213, "grad_norm": 1.2261659940339138, "learning_rate": 7.346718745272485e-09, "loss": 0.5025, "step": 7013 }, { "epoch": 0.9882352941176471, "grad_norm": 1.2374745581252287, "learning_rate": 7.172858862065646e-09, "loss": 0.519, "step": 7014 }, { "epoch": 0.9883761887988728, "grad_norm": 1.1089286957983366, "learning_rate": 7.001080143264061e-09, "loss": 0.459, "step": 7015 }, { "epoch": 0.9885170834800986, "grad_norm": 1.1058216558554717, "learning_rate": 6.831382624643556e-09, "loss": 0.8187, "step": 7016 }, { "epoch": 0.9886579781613244, "grad_norm": 0.9609435720369973, "learning_rate": 6.663766341545863e-09, "loss": 0.8013, "step": 7017 }, { "epoch": 0.9887988728425502, "grad_norm": 1.1014494626249498, "learning_rate": 6.498231328880833e-09, "loss": 0.8221, "step": 7018 }, { "epoch": 0.988939767523776, "grad_norm": 1.3387777271902925, "learning_rate": 6.334777621123111e-09, "loss": 0.6326, "step": 7019 }, { "epoch": 0.9890806622050018, "grad_norm": 1.1583878292638992, "learning_rate": 6.173405252314357e-09, "loss": 0.8759, "step": 7020 }, { "epoch": 0.9892215568862276, "grad_norm": 1.0054814727822907, "learning_rate": 6.0141142560632415e-09, "loss": 0.8059, "step": 7021 }, { "epoch": 0.9893624515674533, "grad_norm": 1.2812503961723167, "learning_rate": 5.8569046655443385e-09, "loss": 0.455, "step": 7022 }, { "epoch": 0.9895033462486791, "grad_norm": 1.269974095731388, "learning_rate": 5.7017765134992355e-09, "loss": 0.5401, "step": 7023 }, { "epoch": 0.9896442409299049, "grad_norm": 0.9936039882075447, "learning_rate": 5.5487298322354225e-09, "loss": 0.8635, "step": 7024 }, { "epoch": 0.9897851356111307, "grad_norm": 1.1593298700787869, "learning_rate": 5.397764653627402e-09, "loss": 0.471, "step": 7025 }, { "epoch": 0.9899260302923565, "grad_norm": 1.0992591664473064, "learning_rate": 5.2488810091166906e-09, "loss": 0.4327, "step": 7026 }, { "epoch": 0.9900669249735823, "grad_norm": 0.9755308137203725, "learning_rate": 5.1020789297107075e-09, "loss": 0.8232, "step": 7027 }, { "epoch": 0.9902078196548081, "grad_norm": 1.223453511189582, "learning_rate": 4.957358445982774e-09, "loss": 0.4925, "step": 7028 }, { "epoch": 0.9903487143360338, "grad_norm": 1.3370863334340037, "learning_rate": 4.814719588072114e-09, "loss": 0.5231, "step": 7029 }, { "epoch": 0.9904896090172596, "grad_norm": 1.2242744512979082, "learning_rate": 4.674162385687186e-09, "loss": 0.4521, "step": 7030 }, { "epoch": 0.9906305036984854, "grad_norm": 0.9864863456778591, "learning_rate": 4.535686868101241e-09, "loss": 0.8141, "step": 7031 }, { "epoch": 0.9907713983797112, "grad_norm": 0.990808493700851, "learning_rate": 4.399293064153432e-09, "loss": 0.848, "step": 7032 }, { "epoch": 0.990912293060937, "grad_norm": 0.9614418775182388, "learning_rate": 4.264981002249924e-09, "loss": 0.8423, "step": 7033 }, { "epoch": 0.9910531877421628, "grad_norm": 1.2722099131850182, "learning_rate": 4.132750710363898e-09, "loss": 0.5648, "step": 7034 }, { "epoch": 0.9911940824233885, "grad_norm": 1.119701623025157, "learning_rate": 4.002602216033325e-09, "loss": 0.8854, "step": 7035 }, { "epoch": 0.9913349771046143, "grad_norm": 1.182472314828846, "learning_rate": 3.8745355463643e-09, "loss": 0.8334, "step": 7036 }, { "epoch": 0.9914758717858401, "grad_norm": 1.1755072589668722, "learning_rate": 3.7485507280288216e-09, "loss": 0.8263, "step": 7037 }, { "epoch": 0.9916167664670659, "grad_norm": 1.2501768763131833, "learning_rate": 3.62464778726479e-09, "loss": 0.527, "step": 7038 }, { "epoch": 0.9917576611482917, "grad_norm": 1.1667423765151155, "learning_rate": 3.5028267498782297e-09, "loss": 0.4368, "step": 7039 }, { "epoch": 0.9918985558295175, "grad_norm": 1.0928897786911616, "learning_rate": 3.383087641238847e-09, "loss": 0.8184, "step": 7040 }, { "epoch": 0.9920394505107433, "grad_norm": 1.0080824519673912, "learning_rate": 3.265430486284471e-09, "loss": 0.8236, "step": 7041 }, { "epoch": 0.992180345191969, "grad_norm": 1.1593783967405586, "learning_rate": 3.1498553095199446e-09, "loss": 0.8143, "step": 7042 }, { "epoch": 0.9923212398731948, "grad_norm": 1.0140410768694603, "learning_rate": 3.036362135014903e-09, "loss": 0.8742, "step": 7043 }, { "epoch": 0.9924621345544206, "grad_norm": 1.0613222029065514, "learning_rate": 2.9249509864059945e-09, "loss": 0.8097, "step": 7044 }, { "epoch": 0.9926030292356464, "grad_norm": 1.0778755009497998, "learning_rate": 2.8156218868957696e-09, "loss": 0.8323, "step": 7045 }, { "epoch": 0.9927439239168722, "grad_norm": 1.1447182494063464, "learning_rate": 2.7083748592560135e-09, "loss": 0.81, "step": 7046 }, { "epoch": 0.992884818598098, "grad_norm": 0.9243991713684226, "learning_rate": 2.603209925819972e-09, "loss": 0.8473, "step": 7047 }, { "epoch": 0.9930257132793237, "grad_norm": 1.2216900772717434, "learning_rate": 2.500127108491235e-09, "loss": 0.4337, "step": 7048 }, { "epoch": 0.9931666079605495, "grad_norm": 1.063069716699998, "learning_rate": 2.3991264287381854e-09, "loss": 0.8318, "step": 7049 }, { "epoch": 0.9933075026417753, "grad_norm": 1.242750165360615, "learning_rate": 2.3002079075973293e-09, "loss": 0.4797, "step": 7050 }, { "epoch": 0.993448397323001, "grad_norm": 1.0036962880510416, "learning_rate": 2.2033715656666342e-09, "loss": 0.8944, "step": 7051 }, { "epoch": 0.9935892920042269, "grad_norm": 0.9520353891307288, "learning_rate": 2.1086174231166323e-09, "loss": 0.7927, "step": 7052 }, { "epoch": 0.9937301866854527, "grad_norm": 0.9768243818122893, "learning_rate": 2.015945499680427e-09, "loss": 0.8369, "step": 7053 }, { "epoch": 0.9938710813666785, "grad_norm": 0.9638087631238411, "learning_rate": 1.9253558146592464e-09, "loss": 0.8271, "step": 7054 }, { "epoch": 0.9940119760479041, "grad_norm": 1.2740324463513224, "learning_rate": 1.8368483869179999e-09, "loss": 0.5753, "step": 7055 }, { "epoch": 0.99415287072913, "grad_norm": 0.9624737535676786, "learning_rate": 1.75042323489083e-09, "loss": 0.83, "step": 7056 }, { "epoch": 0.9942937654103557, "grad_norm": 1.0649701766230755, "learning_rate": 1.6660803765777833e-09, "loss": 0.3971, "step": 7057 }, { "epoch": 0.9944346600915815, "grad_norm": 1.0307705841917445, "learning_rate": 1.5838198295436979e-09, "loss": 0.8196, "step": 7058 }, { "epoch": 0.9945755547728073, "grad_norm": 1.018093132316932, "learning_rate": 1.5036416109204255e-09, "loss": 0.8754, "step": 7059 }, { "epoch": 0.9947164494540331, "grad_norm": 1.1581024122899086, "learning_rate": 1.4255457374068305e-09, "loss": 0.4857, "step": 7060 }, { "epoch": 0.994857344135259, "grad_norm": 1.1248329138049513, "learning_rate": 1.3495322252687904e-09, "loss": 0.8557, "step": 7061 }, { "epoch": 0.9949982388164846, "grad_norm": 0.969320275103237, "learning_rate": 1.275601090334755e-09, "loss": 0.7633, "step": 7062 }, { "epoch": 0.9951391334977104, "grad_norm": 1.2157232377783436, "learning_rate": 1.2037523480035173e-09, "loss": 0.5157, "step": 7063 }, { "epoch": 0.9952800281789362, "grad_norm": 0.9035719376935141, "learning_rate": 1.133986013239774e-09, "loss": 0.8102, "step": 7064 }, { "epoch": 0.995420922860162, "grad_norm": 1.3143332209163805, "learning_rate": 1.066302100570793e-09, "loss": 0.5118, "step": 7065 }, { "epoch": 0.9955618175413878, "grad_norm": 0.9740465625121575, "learning_rate": 1.0007006240952965e-09, "loss": 0.8325, "step": 7066 }, { "epoch": 0.9957027122226136, "grad_norm": 1.2990272602210549, "learning_rate": 9.37181597474579e-10, "loss": 0.5281, "step": 7067 }, { "epoch": 0.9958436069038393, "grad_norm": 1.058122593543831, "learning_rate": 8.75745033938058e-10, "loss": 0.8086, "step": 7068 }, { "epoch": 0.9959845015850651, "grad_norm": 1.0027185381365815, "learning_rate": 8.163909462799435e-10, "loss": 0.8447, "step": 7069 }, { "epoch": 0.9961253962662909, "grad_norm": 1.2241688468353806, "learning_rate": 7.59119346862569e-10, "loss": 0.5093, "step": 7070 }, { "epoch": 0.9962662909475167, "grad_norm": 1.0256348563037496, "learning_rate": 7.039302476141708e-10, "loss": 0.8265, "step": 7071 }, { "epoch": 0.9964071856287425, "grad_norm": 1.1715134367676752, "learning_rate": 6.508236600266671e-10, "loss": 0.4495, "step": 7072 }, { "epoch": 0.9965480803099683, "grad_norm": 0.9853825397077689, "learning_rate": 5.997995951623203e-10, "loss": 0.8863, "step": 7073 }, { "epoch": 0.9966889749911941, "grad_norm": 1.0571485987476181, "learning_rate": 5.50858063647075e-10, "loss": 0.8467, "step": 7074 }, { "epoch": 0.9968298696724198, "grad_norm": 1.1276910949647112, "learning_rate": 5.039990756738889e-10, "loss": 0.8444, "step": 7075 }, { "epoch": 0.9969707643536456, "grad_norm": 1.0222689759414563, "learning_rate": 4.5922264100162206e-10, "loss": 0.9029, "step": 7076 }, { "epoch": 0.9971116590348714, "grad_norm": 1.385869915104501, "learning_rate": 4.165287689550379e-10, "loss": 0.6484, "step": 7077 }, { "epoch": 0.9972525537160972, "grad_norm": 1.021848681818878, "learning_rate": 3.7591746842702283e-10, "loss": 0.8907, "step": 7078 }, { "epoch": 0.997393448397323, "grad_norm": 1.1659345167910182, "learning_rate": 3.3738874787525577e-10, "loss": 0.4183, "step": 7079 }, { "epoch": 0.9975343430785488, "grad_norm": 0.9790341427552397, "learning_rate": 3.009426153233186e-10, "loss": 0.8295, "step": 7080 }, { "epoch": 0.9976752377597745, "grad_norm": 1.1207430257826463, "learning_rate": 2.665790783629163e-10, "loss": 0.8733, "step": 7081 }, { "epoch": 0.9978161324410003, "grad_norm": 1.2137367284955034, "learning_rate": 2.342981441494363e-10, "loss": 0.4413, "step": 7082 }, { "epoch": 0.9979570271222261, "grad_norm": 1.0427512390844513, "learning_rate": 2.0409981940638924e-10, "loss": 0.8114, "step": 7083 }, { "epoch": 0.9980979218034519, "grad_norm": 1.0249062449698725, "learning_rate": 1.7598411042429876e-10, "loss": 0.848, "step": 7084 }, { "epoch": 0.9982388164846777, "grad_norm": 0.9471224106851835, "learning_rate": 1.4995102305626062e-10, "loss": 0.7853, "step": 7085 }, { "epoch": 0.9983797111659035, "grad_norm": 1.1355679141590824, "learning_rate": 1.2600056272571438e-10, "loss": 0.7787, "step": 7086 }, { "epoch": 0.9985206058471293, "grad_norm": 1.0990866311804908, "learning_rate": 1.0413273441978177e-10, "loss": 0.8904, "step": 7087 }, { "epoch": 0.998661500528355, "grad_norm": 1.0912792399732667, "learning_rate": 8.434754269370793e-11, "loss": 0.8936, "step": 7088 }, { "epoch": 0.9988023952095808, "grad_norm": 0.9768965549076668, "learning_rate": 6.664499166864069e-11, "loss": 0.8093, "step": 7089 }, { "epoch": 0.9989432898908066, "grad_norm": 1.1412635292971536, "learning_rate": 5.102508502941028e-11, "loss": 0.8875, "step": 7090 }, { "epoch": 0.9990841845720324, "grad_norm": 1.2179268518565325, "learning_rate": 3.748782603119061e-11, "loss": 0.5269, "step": 7091 }, { "epoch": 0.9992250792532582, "grad_norm": 0.9142673898113908, "learning_rate": 2.6033217491727715e-11, "loss": 0.8026, "step": 7092 }, { "epoch": 0.999365973934484, "grad_norm": 1.2591334052945566, "learning_rate": 1.6661261796890872e-11, "loss": 0.7706, "step": 7093 }, { "epoch": 0.9995068686157098, "grad_norm": 1.0411976065810589, "learning_rate": 9.371960899562383e-12, "loss": 0.9012, "step": 7094 }, { "epoch": 0.9996477632969355, "grad_norm": 1.0114773266745671, "learning_rate": 4.1653163163069e-12, "loss": 0.8158, "step": 7095 }, { "epoch": 0.9997886579781613, "grad_norm": 0.950658831667573, "learning_rate": 1.0413291329225417e-12, "loss": 0.8264, "step": 7096 }, { "epoch": 0.9999295526593871, "grad_norm": 1.6758240748711644, "learning_rate": 0.0, "loss": 0.7465, "step": 7097 }, { "epoch": 0.9999295526593871, "step": 7097, "total_flos": 1.1577735833911296e+16, "train_loss": 0.7757017383470289, "train_runtime": 39614.5541, "train_samples_per_second": 22.933, "train_steps_per_second": 0.179 } ], "logging_steps": 1.0, "max_steps": 7097, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.1577735833911296e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }